Merge yet again with the main repository.
This commit is contained in:
12
SConstruct
12
SConstruct
@@ -663,10 +663,14 @@ if not py_getvar('Py_ENABLE_SHARED'):
|
||||
|
||||
py_libs = []
|
||||
for lib in py_getvar('LIBS').split() + py_getvar('SYSLIBS').split():
|
||||
assert lib.startswith('-l')
|
||||
lib = lib[2:]
|
||||
if lib not in py_libs:
|
||||
py_libs.append(lib)
|
||||
if not lib.startswith('-l'):
|
||||
# Python requires some special flags to link (e.g. -framework
|
||||
# common on OS X systems), assume appending preserves order
|
||||
main.Append(LINKFLAGS=[lib])
|
||||
else:
|
||||
lib = lib[2:]
|
||||
if lib not in py_libs:
|
||||
py_libs.append(lib)
|
||||
py_libs.append(py_version)
|
||||
|
||||
main.Append(CPPPATH=py_includes)
|
||||
|
||||
@@ -483,6 +483,15 @@ def makeDualRoot(testSystem, driveSystem, dumpfile):
|
||||
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
|
||||
|
||||
if hasattr(testSystem, 'realview'):
|
||||
self.etherlink.int0 = Parent.testsys.realview.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.realview.ethernet.interface
|
||||
elif hasattr(testSystem, 'tsunami'):
|
||||
self.etherlink.int0 = Parent.testsys.tsunami.ethernet.interface
|
||||
self.etherlink.int1 = Parent.drivesys.tsunami.ethernet.interface
|
||||
else:
|
||||
fatal("Don't know how to connect these system together")
|
||||
|
||||
if dumpfile:
|
||||
self.etherdump = EtherDump(file=dumpfile)
|
||||
self.etherlink.dump = Parent.etherdump
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
# Authors: Lisa Hsu
|
||||
|
||||
# system options
|
||||
parser.add_option("-c", "--cpu-type", type="choice", default="atomic",
|
||||
parser.add_option("--cpu-type", type="choice", default="atomic",
|
||||
choices = ["atomic", "timing", "detailed", "inorder"],
|
||||
help = "type of cpu to run with")
|
||||
parser.add_option("-n", "--num-cpus", type="int", default=1)
|
||||
@@ -63,7 +63,8 @@ parser.add_option("--work-end-exit-count", action="store", type="int",
|
||||
help="exit at specified work end count")
|
||||
parser.add_option("--work-begin-exit-count", action="store", type="int",
|
||||
help="exit at specified work begin count")
|
||||
|
||||
parser.add_option("--init-param", action="store", type="int", default=0,
|
||||
help="Parameter available in simulation with m5 initparam")
|
||||
|
||||
# Checkpointing options
|
||||
###Note that performing checkpointing via python script files will override
|
||||
@@ -84,6 +85,10 @@ parser.add_option("--work-end-checkpoint-count", action="store", type="int",
|
||||
help="checkpoint at specified work end count")
|
||||
parser.add_option("--work-cpus-checkpoint-count", action="store", type="int",
|
||||
help="checkpoint and exit when active cpu count is reached")
|
||||
parser.add_option("--restore-with-cpu", action="store", type="choice",
|
||||
default="atomic", choices = ["atomic", "timing",
|
||||
"detailed", "inorder"],
|
||||
help = "cpu type for restoring from a checkpoint")
|
||||
|
||||
|
||||
# CPU Switching - default switch model goes from a checkpoint
|
||||
|
||||
@@ -60,7 +60,15 @@ def setCPUClass(options):
|
||||
test_mem_mode = 'atomic'
|
||||
|
||||
if not atomic:
|
||||
if options.checkpoint_restore != None or options.fast_forward:
|
||||
if options.checkpoint_restore != None:
|
||||
if options.restore_with_cpu != options.cpu_type:
|
||||
CPUClass = TmpClass
|
||||
class TmpClass(AtomicSimpleCPU): pass
|
||||
else:
|
||||
if options.restore_with_cpu != "atomic":
|
||||
test_mem_mode = 'timing'
|
||||
|
||||
elif options.fast_forward:
|
||||
CPUClass = TmpClass
|
||||
class TmpClass(AtomicSimpleCPU): pass
|
||||
else:
|
||||
|
||||
@@ -731,7 +731,7 @@ class vpr_route(vpr):
|
||||
'-first_iter_pres_fac', '4', '-initial_pres_fac', '8' ]
|
||||
output = 'route_log.out'
|
||||
|
||||
all = [ ammp, applu, apsi, art110, art470, equake, facerec, fma3d, galgel,
|
||||
all = [ ammp, applu, apsi, art, art110, art470, equake, facerec, fma3d, galgel,
|
||||
lucas, mesa, mgrid, sixtrack, swim, wupwise, bzip2_source,
|
||||
bzip2_graphic, bzip2_program, crafty, eon_kajiya, eon_cook,
|
||||
eon_rushmeier, gap, gcc_166, gcc_200, gcc_expr, gcc_integrate,
|
||||
|
||||
@@ -151,6 +151,8 @@ if options.kernel is not None:
|
||||
if options.script is not None:
|
||||
test_sys.readfile = options.script
|
||||
|
||||
test_sys.init_param = options.init_param
|
||||
|
||||
test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)]
|
||||
|
||||
CacheConfig.config_cache(options, test_sys)
|
||||
@@ -190,8 +192,8 @@ if len(bm) == 2:
|
||||
elif buildEnv['TARGET_ISA'] == 'x86':
|
||||
drive_sys = makeX86System(drive_mem_mode, np, bm[1])
|
||||
elif buildEnv['TARGET_ISA'] == 'arm':
|
||||
drive_sys = makeArmSystem(drive_mem_mode,
|
||||
machine_options.machine_type, bm[1])
|
||||
drive_sys = makeArmSystem(drive_mem_mode, options.machine_type, bm[1])
|
||||
|
||||
drive_sys.cpu = DriveCPUClass(cpu_id=0)
|
||||
drive_sys.cpu.connectAllPorts(drive_sys.membus)
|
||||
if options.fastmem:
|
||||
@@ -199,6 +201,7 @@ if len(bm) == 2:
|
||||
if options.kernel is not None:
|
||||
drive_sys.kernel = binary(options.kernel)
|
||||
|
||||
drive_sys.init_param = options.init_param
|
||||
root = makeDualRoot(test_sys, drive_sys, options.etherdump)
|
||||
elif len(bm) == 1:
|
||||
root = Root(system=test_sys)
|
||||
|
||||
@@ -67,7 +67,8 @@ def define_options(parser):
|
||||
def create_system(options, system, piobus = None, dma_devices = []):
|
||||
|
||||
system.ruby = RubySystem(clock = options.clock,
|
||||
stats_filename = options.ruby_stats)
|
||||
stats_filename = options.ruby_stats,
|
||||
no_mem_vec = options.use_map)
|
||||
ruby = system.ruby
|
||||
|
||||
protocol = buildEnv['PROTOCOL']
|
||||
@@ -154,11 +155,8 @@ def create_system(options, system, piobus = None, dma_devices = []):
|
||||
|
||||
ruby_profiler = RubyProfiler(ruby_system = ruby,
|
||||
num_of_sequencers = len(cpu_sequencers))
|
||||
ruby_tracer = RubyTracer(ruby_system = ruby)
|
||||
|
||||
ruby.network = network
|
||||
ruby.profiler = ruby_profiler
|
||||
ruby.tracer = ruby_tracer
|
||||
ruby.mem_size = total_mem_size
|
||||
ruby._cpu_ruby_ports = cpu_sequencers
|
||||
ruby.random_seed = options.random_seed
|
||||
|
||||
@@ -91,7 +91,7 @@ ElfFile('libelf_msize.c')
|
||||
|
||||
m4env = main.Clone()
|
||||
if m4env['GCC']:
|
||||
major,minor,dot = [ int(x) for x in m4env['CXXVERSION'].split('.')]
|
||||
major,minor,dot = [int(x) for x in m4env['GCC_VERSION'].split('.')]
|
||||
if major >= 4:
|
||||
m4env.Append(CCFLAGS=['-Wno-pointer-sign'])
|
||||
m4env.Append(CCFLAGS=['-Wno-implicit'])
|
||||
|
||||
@@ -851,8 +851,8 @@ def makeEnv(label, objsfx, strip = False, **kwargs):
|
||||
swig_env.Append(CCFLAGS='-Wno-uninitialized')
|
||||
swig_env.Append(CCFLAGS='-Wno-sign-compare')
|
||||
swig_env.Append(CCFLAGS='-Wno-parentheses')
|
||||
swig_env.Append(CCFLAGS='-Wno-unused-label')
|
||||
if compareVersions(env['GCC_VERSION'], '4.6.0') != -1:
|
||||
swig_env.Append(CCFLAGS='-Wno-unused-label')
|
||||
swig_env.Append(CCFLAGS='-Wno-unused-but-set-variable')
|
||||
|
||||
werror_env = new_env.Clone()
|
||||
|
||||
@@ -190,12 +190,15 @@ let {{
|
||||
exec_output += PredOpExecute.subst(loadsymbolIop)
|
||||
|
||||
initparamCode = '''
|
||||
Rt = PseudoInst::initParam(xc->tcBase());
|
||||
uint64_t ip_val = PseudoInst::initParam(xc->tcBase());
|
||||
R0 = bits(ip_val, 31, 0);
|
||||
R1 = bits(ip_val, 63, 32);
|
||||
'''
|
||||
|
||||
initparamIop = InstObjParams("initparam", "Initparam", "PredOp",
|
||||
{ "code": initparamCode,
|
||||
"predicate_test": predicateTest })
|
||||
"predicate_test": predicateTest },
|
||||
["IsNonSpeculative"])
|
||||
header_output += BasicDeclare.subst(initparamIop)
|
||||
decoder_output += BasicConstructor.subst(initparamIop)
|
||||
exec_output += PredOpExecute.subst(initparamIop)
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
* Authors: Gabe Black
|
||||
* Korey Sewell
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#include "arch/mips/faults.hh"
|
||||
@@ -118,7 +120,7 @@ MipsFaultBase::setExceptionState(ThreadContext *tc, uint8_t excCode)
|
||||
DPRINTF(MipsPRA, "PC: %s\n", pc);
|
||||
bool delay_slot = pc.pc() + sizeof(MachInst) != pc.npc();
|
||||
tc->setMiscRegNoEffect(MISCREG_EPC,
|
||||
pc.pc() - delay_slot ? sizeof(MachInst) : 0);
|
||||
pc.pc() - (delay_slot ? sizeof(MachInst) : 0));
|
||||
|
||||
// Set Cause_EXCCODE field
|
||||
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
* Authors: Gabe Black
|
||||
* Korey Sewell
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#ifndef __MIPS_FAULTS_HH__
|
||||
@@ -88,7 +90,7 @@ class MipsFaultBase : public FaultBase
|
||||
virtual FaultVect base(ThreadContext *tc) const
|
||||
{
|
||||
StatusReg status = tc->readMiscReg(MISCREG_STATUS);
|
||||
if (status.bev)
|
||||
if (!status.bev)
|
||||
return tc->readMiscReg(MISCREG_EBASE);
|
||||
else
|
||||
return 0xbfc00200;
|
||||
@@ -167,7 +169,7 @@ class CoprocessorUnusableFault : public MipsFault<CoprocessorUnusableFault>
|
||||
if (FullSystem) {
|
||||
CauseReg cause = tc->readMiscReg(MISCREG_CAUSE);
|
||||
cause.ce = coProcID;
|
||||
tc->setMiscReg(MISCREG_CAUSE, cause);
|
||||
tc->setMiscRegNoEffect(MISCREG_CAUSE, cause);
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -179,7 +181,8 @@ class InterruptFault : public MipsFault<InterruptFault>
|
||||
offset(ThreadContext *tc) const
|
||||
{
|
||||
CauseReg cause = tc->readMiscRegNoEffect(MISCREG_CAUSE);
|
||||
return cause.iv ? 0x200 : 0x000;
|
||||
// offset 0x200 for release 2, 0x180 for release 1.
|
||||
return cause.iv ? 0x200 : 0x180;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -251,9 +254,10 @@ class TlbFault : public AddressFault<T>
|
||||
StaticInstPtr inst = StaticInst::nullStaticInstPtr)
|
||||
{
|
||||
if (FullSystem) {
|
||||
DPRINTF(MipsPRA, "Fault %s encountered.\n", name());
|
||||
tc->pcState(this->vect(tc));
|
||||
DPRINTF(MipsPRA, "Fault %s encountered.\n", this->name());
|
||||
Addr vect = this->vect(tc);
|
||||
setTlbExceptionState(tc, this->code());
|
||||
tc->pcState(vect);
|
||||
} else {
|
||||
AddressFault<T>::invoke(tc, inst);
|
||||
}
|
||||
|
||||
@@ -1253,7 +1253,7 @@ decode OPCODE_HI default Unknown::unknown() {
|
||||
//When rs=L1
|
||||
//Note: "1. Format type L is legal only if 64-bit
|
||||
//floating point operations are enabled."
|
||||
0x5: decode FUNCTION_HI {
|
||||
0x5: decode FUNCTION {
|
||||
format FloatConvertOp {
|
||||
0x20: cvt_s_l({{ val = Fs_ud; }}, ToSingle);
|
||||
0x21: cvt_d_l({{ val = Fs_ud; }}, ToDouble);
|
||||
|
||||
@@ -55,7 +55,7 @@ unameFunc(SyscallDesc *desc, int callnum, LiveProcess *process,
|
||||
|
||||
strcpy(name->sysname, "Linux");
|
||||
strcpy(name->nodename,"m5.eecs.umich.edu");
|
||||
strcpy(name->release, "2.4.20");
|
||||
strcpy(name->release, "2.6.35");
|
||||
strcpy(name->version, "#1 Mon Aug 18 11:32:15 EDT 2003");
|
||||
strcpy(name->machine, "mips");
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ const int NumIntRegs = NumIntArchRegs + NumIntSpecialRegs; //HI & LO Regs
|
||||
const int NumFloatRegs = NumFloatArchRegs + NumFloatSpecialRegs;//
|
||||
|
||||
const uint32_t MIPS32_QNAN = 0x7fbfffff;
|
||||
const uint64_t MIPS64_QNAN = ULL(0x7fbfffffffffffff);
|
||||
const uint64_t MIPS64_QNAN = ULL(0x7ff7ffffffffffff);
|
||||
|
||||
enum FPControlRegNums {
|
||||
FLOATREG_FIR = NumFloatArchRegs,
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
* Authors: Nathan Binkert
|
||||
* Steve Reinhardt
|
||||
* Jaidev Patwardhan
|
||||
* Zhengxing Li
|
||||
* Deyuan Guo
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
@@ -310,18 +312,6 @@ Fault
|
||||
TLB::translateData(RequestPtr req, ThreadContext *tc, bool write)
|
||||
{
|
||||
if (!FullSystem) {
|
||||
//@TODO: This should actually use TLB instead of going directly
|
||||
// to the page table in syscall mode.
|
||||
/**
|
||||
* Check for alignment faults
|
||||
*/
|
||||
if (req->getVaddr() & (req->getSize() - 1)) {
|
||||
DPRINTF(TLB, "Alignment Fault on %#x, size = %d", req->getVaddr(),
|
||||
req->getSize());
|
||||
return new AddressErrorFault(req->getVaddr(), write);
|
||||
}
|
||||
|
||||
|
||||
Process * p = tc->getProcessPtr();
|
||||
|
||||
Fault fault = p->pTable->translate(req);
|
||||
|
||||
@@ -42,26 +42,34 @@ microcode = '''
|
||||
def macroop IN_R_I {
|
||||
.adjust_imm trimImm(8)
|
||||
limm t1, imm, dataSize=asz
|
||||
mfence
|
||||
ld reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop IN_R_R {
|
||||
zexti t2, regm, 15, dataSize=8
|
||||
mfence
|
||||
ld reg, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop OUT_I_R {
|
||||
.adjust_imm trimImm(8)
|
||||
limm t1, imm, dataSize=8
|
||||
mfence
|
||||
st reg, intseg, [1, t1, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
|
||||
def macroop OUT_R_R {
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
mfence
|
||||
st regm, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
};
|
||||
'''
|
||||
|
||||
@@ -45,9 +45,11 @@ def macroop INS_M_R {
|
||||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
st t6, es, [1, t0, rdi]
|
||||
mfence
|
||||
|
||||
add rdi, rdi, t3, dataSize=asz
|
||||
};
|
||||
@@ -63,6 +65,7 @@ def macroop INS_E_M_R {
|
||||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
topOfLoop:
|
||||
ld t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
@@ -72,6 +75,7 @@ topOfLoop:
|
||||
add rdi, rdi, t3, dataSize=asz
|
||||
br label("topOfLoop"), flags=(nCEZF,)
|
||||
end:
|
||||
mfence
|
||||
fault "NoFault"
|
||||
};
|
||||
|
||||
@@ -84,9 +88,11 @@ def macroop OUTS_R_M {
|
||||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
ld t6, ds, [1, t0, rsi]
|
||||
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
nonSpec=True
|
||||
mfence
|
||||
|
||||
add rsi, rsi, t3, dataSize=asz
|
||||
};
|
||||
@@ -102,6 +108,7 @@ def macroop OUTS_E_R_M {
|
||||
|
||||
zexti t2, reg, 15, dataSize=8
|
||||
|
||||
mfence
|
||||
topOfLoop:
|
||||
ld t6, ds, [1, t0, rsi]
|
||||
st t6, intseg, [1, t2, t0], "IntAddrPrefixIO << 3", addressSize=8, \
|
||||
@@ -111,6 +118,7 @@ topOfLoop:
|
||||
add rsi, rsi, t3, dataSize=asz
|
||||
br label("topOfLoop"), flags=(nCEZF,)
|
||||
end:
|
||||
mfence
|
||||
fault "NoFault"
|
||||
};
|
||||
'''
|
||||
|
||||
@@ -30,6 +30,12 @@
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/shared_region.h>
|
||||
#include <mach/task.h>
|
||||
#endif
|
||||
|
||||
#include <cctype>
|
||||
#include <cerrno>
|
||||
#include <cmath>
|
||||
@@ -82,7 +88,31 @@ procInfo(const char *filename, const char *target)
|
||||
}
|
||||
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
memUsage()
|
||||
{
|
||||
// For the Mach-based Darwin kernel, use the task_info of the self task
|
||||
#ifdef __APPLE__
|
||||
struct task_basic_info t_info;
|
||||
mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
|
||||
|
||||
if (KERN_SUCCESS != task_info(mach_task_self(),
|
||||
TASK_BASIC_INFO, (task_info_t)&t_info,
|
||||
&t_info_count)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Mimic Darwin's implementation of top and subtract
|
||||
// SHARED_REGION_SIZE from the tasks virtual size to account for the
|
||||
// shared memory submap that is incorporated into every process.
|
||||
return (t_info.virtual_size - SHARED_REGION_SIZE) / 1024;
|
||||
#else
|
||||
// Linux implementation
|
||||
return procInfo("/proc/self/status", "VmSize:");
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -39,7 +39,11 @@ std::string &hostname();
|
||||
|
||||
uint64_t procInfo(const char *filename, const char *target);
|
||||
|
||||
inline uint64_t memUsage()
|
||||
{ return procInfo("/proc/self/status", "VmSize:"); }
|
||||
/**
|
||||
* Determine the simulator process' total virtual memory usage.
|
||||
*
|
||||
* @return virtual memory usage in kilobytes
|
||||
*/
|
||||
uint64_t memUsage();
|
||||
|
||||
#endif // __HOSTINFO_HH__
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
* Ali Saidi
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include "base/fenv.hh"
|
||||
#include "base/intmath.hh"
|
||||
#include "base/misc.hh"
|
||||
@@ -67,7 +68,10 @@ Random::genrand(uint32_t max)
|
||||
{
|
||||
if (max == 0)
|
||||
return 0;
|
||||
int log = ceilLog2(max) + 1;
|
||||
if (max == std::numeric_limits<uint32_t>::max())
|
||||
return genrand();
|
||||
|
||||
int log = ceilLog2(max + 1);
|
||||
int shift = (sizeof(uint32_t) * 8 - log);
|
||||
uint32_t random;
|
||||
|
||||
@@ -83,7 +87,10 @@ Random::genrand(uint64_t max)
|
||||
{
|
||||
if (max == 0)
|
||||
return 0;
|
||||
int log = ceilLog2(max) + 1;
|
||||
if (max == std::numeric_limits<uint64_t>::max())
|
||||
return genrand();
|
||||
|
||||
int log = ceilLog2(max + 1);
|
||||
int shift = (sizeof(uint64_t) * 8 - log);
|
||||
uint64_t random;
|
||||
|
||||
|
||||
@@ -1477,6 +1477,8 @@ class HistStor
|
||||
|
||||
/** The current sum. */
|
||||
Counter sum;
|
||||
/** The sum of logarithm of each sample, used to compute geometric mean. */
|
||||
Counter logs;
|
||||
/** The sum of squares. */
|
||||
Counter squares;
|
||||
/** The number of samples. */
|
||||
@@ -1528,6 +1530,7 @@ class HistStor
|
||||
|
||||
sum += val * number;
|
||||
squares += val * val * number;
|
||||
logs += log(val) * number;
|
||||
samples += number;
|
||||
}
|
||||
|
||||
@@ -1567,6 +1570,7 @@ class HistStor
|
||||
data.cvec[i] = cvec[i];
|
||||
|
||||
data.sum = sum;
|
||||
data.logs = logs;
|
||||
data.squares = squares;
|
||||
data.samples = samples;
|
||||
}
|
||||
@@ -1589,6 +1593,7 @@ class HistStor
|
||||
sum = Counter();
|
||||
squares = Counter();
|
||||
samples = Counter();
|
||||
logs = Counter();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -183,6 +183,7 @@ struct DistData
|
||||
VCounter cvec;
|
||||
Counter sum;
|
||||
Counter squares;
|
||||
Counter logs;
|
||||
Counter samples;
|
||||
};
|
||||
|
||||
|
||||
@@ -367,6 +367,12 @@ DistPrint::operator()(ostream &stream) const
|
||||
print.value = data.samples ? data.sum / data.samples : NAN;
|
||||
print(stream);
|
||||
|
||||
if (data.type == Hist) {
|
||||
print.name = base + "gmean";
|
||||
print.value = data.samples ? exp(data.logs / data.samples) : NAN;
|
||||
print(stream);
|
||||
}
|
||||
|
||||
Result stdev = NAN;
|
||||
if (data.samples)
|
||||
stdev = sqrt((data.samples * data.squares - data.sum * data.sum) /
|
||||
@@ -507,7 +513,14 @@ Text::visit(const Vector2dInfo &info)
|
||||
bool havesub = false;
|
||||
VectorPrint print;
|
||||
|
||||
print.subnames = info.y_subnames;
|
||||
if (!info.y_subnames.empty()) {
|
||||
for (off_type i = 0; i < info.y; ++i) {
|
||||
if (!info.y_subnames[i].empty()) {
|
||||
print.subnames = info.y_subnames;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
print.flags = info.flags;
|
||||
print.separatorString = info.separatorString;
|
||||
print.descriptions = descriptions;
|
||||
|
||||
@@ -284,17 +284,16 @@ class BaseCPU : public MemObject
|
||||
void enableFunctionTrace();
|
||||
void traceFunctionsInternal(Addr pc);
|
||||
|
||||
protected:
|
||||
private:
|
||||
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
|
||||
|
||||
public:
|
||||
void traceFunctions(Addr pc)
|
||||
{
|
||||
if (functionTracingEnabled)
|
||||
traceFunctionsInternal(pc);
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<BaseCPU *> cpuList; //!< Static global cpu list
|
||||
|
||||
public:
|
||||
static int numSimulatedCPUs() { return cpuList.size(); }
|
||||
static Counter numSimulatedInstructions()
|
||||
{
|
||||
|
||||
@@ -299,6 +299,7 @@ MultDivUnit::exeMulDiv(int slot_num)
|
||||
}
|
||||
|
||||
mult_div_req->setProcessing(false);
|
||||
cpu->wakeCPU();
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "config/use_checker.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/thread_state.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/exetrace.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/Activity.hh"
|
||||
@@ -987,6 +988,8 @@ DefaultCommit<Impl>::commitInsts()
|
||||
// Updates misc. registers.
|
||||
head_inst->updateMiscRegs();
|
||||
|
||||
cpu->traceFunctions(pc[tid].instAddr());
|
||||
|
||||
TheISA::advancePC(pc[tid], head_inst->staticInst);
|
||||
|
||||
// Keep track of the last sequence number commited
|
||||
|
||||
@@ -446,10 +446,6 @@ void
|
||||
DefaultDecode<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_fetch = fromFetch->size;
|
||||
#ifdef DEBUG
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++)
|
||||
assert(insts[tid].empty());
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_fetch; ++i) {
|
||||
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
|
||||
}
|
||||
|
||||
@@ -1340,10 +1340,10 @@ DefaultIEW<Impl>::executeInsts()
|
||||
fetchRedirect[tid] = true;
|
||||
|
||||
DPRINTF(IEW, "Execute: Branch mispredict detected.\n");
|
||||
DPRINTF(IEW, "Predicted target was PC:%#x, NPC:%#x.\n",
|
||||
inst->predInstAddr(), inst->predNextInstAddr());
|
||||
DPRINTF(IEW, "Predicted target was PC: %s.\n",
|
||||
inst->readPredTarg());
|
||||
DPRINTF(IEW, "Execute: Redirecting fetch to PC: %s.\n",
|
||||
inst->pcState(), inst->nextInstAddr());
|
||||
inst->pcState());
|
||||
// If incorrect, then signal the ROB that it must be squashed.
|
||||
squashDueToBranch(inst, tid);
|
||||
|
||||
|
||||
@@ -766,10 +766,6 @@ void
|
||||
DefaultRename<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_decode = fromDecode->size;
|
||||
#ifdef DEBUG
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++)
|
||||
assert(insts[tid].empty());
|
||||
#endif
|
||||
for (int i = 0; i < insts_from_decode; ++i) {
|
||||
DynInstPtr inst = fromDecode->insts[i];
|
||||
insts[inst->threadNumber].push_back(inst);
|
||||
|
||||
@@ -83,7 +83,9 @@ PCEventQueue::schedule(PCEvent *event)
|
||||
bool
|
||||
PCEventQueue::doService(ThreadContext *tc)
|
||||
{
|
||||
Addr pc = tc->instAddr() & ~0x3;
|
||||
// This will fail to break on Alpha PALcode addresses, but that is
|
||||
// a rare use case.
|
||||
Addr pc = tc->instAddr();
|
||||
int serviced = 0;
|
||||
range_t range = equal_range(pc);
|
||||
for (iterator i = range.first; i != range.second; ++i) {
|
||||
@@ -91,7 +93,7 @@ PCEventQueue::doService(ThreadContext *tc)
|
||||
// another event. This for example, prevents two invocations
|
||||
// of the SkipFuncEvent. Maybe we should have separate PC
|
||||
// event queues for each processor?
|
||||
if (pc != (tc->instAddr() & ~0x3))
|
||||
if (pc != tc->instAddr())
|
||||
continue;
|
||||
|
||||
DPRINTF(PCEvent, "PC based event serviced at %#x: %s\n",
|
||||
|
||||
@@ -43,7 +43,7 @@ class DirectedGenerator : public SimObject
|
||||
virtual ~DirectedGenerator() {}
|
||||
|
||||
virtual bool initiate() = 0;
|
||||
virtual void performCallback(uint proc, Addr address) = 0;
|
||||
virtual void performCallback(uint32_t proc, Addr address) = 0;
|
||||
|
||||
void setDirectedTester(RubyDirectedTester* directed_tester);
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ InvalidateGenerator::initiate()
|
||||
}
|
||||
|
||||
void
|
||||
InvalidateGenerator::performCallback(uint proc, Addr address)
|
||||
InvalidateGenerator::performCallback(uint32_t proc, Addr address)
|
||||
{
|
||||
assert(m_address == address);
|
||||
|
||||
|
||||
@@ -49,14 +49,14 @@ class InvalidateGenerator : public DirectedGenerator
|
||||
~InvalidateGenerator();
|
||||
|
||||
bool initiate();
|
||||
void performCallback(uint proc, Addr address);
|
||||
void performCallback(uint32_t proc, Addr address);
|
||||
|
||||
private:
|
||||
InvalidateGeneratorStatus m_status;
|
||||
Addr m_address;
|
||||
uint m_active_read_node;
|
||||
uint m_active_inv_node;
|
||||
uint m_addr_increment_size;
|
||||
uint32_t m_active_read_node;
|
||||
uint32_t m_active_inv_node;
|
||||
uint32_t m_addr_increment_size;
|
||||
};
|
||||
|
||||
#endif //__CPU_DIRECTEDTEST_INVALIDATEGENERATOR_HH__
|
||||
|
||||
@@ -53,11 +53,11 @@ class RubyDirectedTester : public MemObject
|
||||
RubyDirectedTester *tester;
|
||||
|
||||
public:
|
||||
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint _idx)
|
||||
CpuPort(const std::string &_name, RubyDirectedTester *_tester, uint32_t _idx)
|
||||
: SimpleTimingPort(_name, _tester), tester(_tester), idx(_idx)
|
||||
{}
|
||||
|
||||
uint idx;
|
||||
uint32_t idx;
|
||||
|
||||
protected:
|
||||
virtual bool recvTiming(PacketPtr pkt);
|
||||
|
||||
@@ -89,7 +89,7 @@ SeriesRequestGenerator::initiate()
|
||||
}
|
||||
|
||||
void
|
||||
SeriesRequestGenerator::performCallback(uint proc, Addr address)
|
||||
SeriesRequestGenerator::performCallback(uint32_t proc, Addr address)
|
||||
{
|
||||
assert(m_active_node == proc);
|
||||
assert(m_address == address);
|
||||
|
||||
@@ -49,13 +49,13 @@ class SeriesRequestGenerator : public DirectedGenerator
|
||||
~SeriesRequestGenerator();
|
||||
|
||||
bool initiate();
|
||||
void performCallback(uint proc, Addr address);
|
||||
void performCallback(uint32_t proc, Addr address);
|
||||
|
||||
private:
|
||||
SeriesRequestGeneratorStatus m_status;
|
||||
Addr m_address;
|
||||
uint m_active_node;
|
||||
uint m_addr_increment_size;
|
||||
uint32_t m_active_node;
|
||||
uint32_t m_addr_increment_size;
|
||||
bool m_issue_writes;
|
||||
};
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ Pl111::Pl111(const Params *p)
|
||||
{
|
||||
pioSize = 0xFFFF;
|
||||
|
||||
pic = simout.create("framebuffer.bmp", true);
|
||||
pic = simout.create(csprintf("%s.framebuffer.bmp", sys->name()), true);
|
||||
|
||||
dmaBuffer = new uint8_t[LcdMaxWidth * LcdMaxHeight * sizeof(uint32_t)];
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ void
|
||||
PioDevice::init()
|
||||
{
|
||||
if (!pioPort)
|
||||
panic("Pio port %s not connected to anything!", name());
|
||||
panic("Pio port of %s not connected to anything!", name());
|
||||
pioPort->sendStatusChange(Port::RangeChange);
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ DebugFlag('MemoryAccess')
|
||||
|
||||
DebugFlag('ProtocolTrace')
|
||||
DebugFlag('RubyCache')
|
||||
DebugFlag('RubyCacheTrace')
|
||||
DebugFlag('RubyDma')
|
||||
DebugFlag('RubyGenerated')
|
||||
DebugFlag('RubyMemory')
|
||||
@@ -67,9 +68,9 @@ DebugFlag('RubyPort')
|
||||
DebugFlag('RubyQueue')
|
||||
DebugFlag('RubySequencer')
|
||||
DebugFlag('RubySlicc')
|
||||
DebugFlag('RubyStorebuffer')
|
||||
DebugFlag('RubySystem')
|
||||
DebugFlag('RubyTester')
|
||||
|
||||
CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester',
|
||||
'RubyGenerated', 'RubySlicc', 'RubyStorebuffer', 'RubyCache',
|
||||
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer'])
|
||||
'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache',
|
||||
'RubyMemory', 'RubyDma', 'RubyPort', 'RubySequencer', 'RubyCacheTrace'])
|
||||
|
||||
@@ -447,13 +447,6 @@ Bus::recvAtomic(PacketPtr pkt)
|
||||
void
|
||||
Bus::recvFunctional(PacketPtr pkt)
|
||||
{
|
||||
if (!pkt->isPrint()) {
|
||||
// don't do DPRINTFs on PrintReq as it clutters up the output
|
||||
DPRINTF(Bus,
|
||||
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
|
||||
pkt->getSrc(), pkt->getDest(), pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
}
|
||||
assert(pkt->getDest() == Packet::Broadcast);
|
||||
|
||||
int port_id = findPort(pkt->getAddr());
|
||||
@@ -462,6 +455,14 @@ Bus::recvFunctional(PacketPtr pkt)
|
||||
// id after each
|
||||
int src_id = pkt->getSrc();
|
||||
|
||||
if (!pkt->isPrint()) {
|
||||
// don't do DPRINTFs on PrintReq as it clutters up the output
|
||||
DPRINTF(Bus,
|
||||
"recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
|
||||
src_id, port_id, pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
}
|
||||
|
||||
assert(pkt->isRequest()); // hasn't already been satisfied
|
||||
|
||||
SnoopIter s_end = snoopPorts.end();
|
||||
|
||||
@@ -1,4 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
@@ -192,14 +204,98 @@ Packet::checkFunctional(Printable *obj, Addr addr, int size, uint8_t *data)
|
||||
memcpy(getPtr<uint8_t>(), data + offset, getSize());
|
||||
return true;
|
||||
} else {
|
||||
// In this case the timing packet only partially satisfies
|
||||
// the request, so we would need more information to make
|
||||
// this work. Like bytes valid in the packet or
|
||||
// something, so the request could continue and get this
|
||||
// bit of possibly newer data along with the older data
|
||||
// not written to yet.
|
||||
panic("Memory value only partially satisfies the functional "
|
||||
"request. Now what?");
|
||||
// Offsets and sizes to copy in case of partial overlap
|
||||
int func_offset;
|
||||
int val_offset;
|
||||
int overlap_size;
|
||||
|
||||
// calculate offsets and copy sizes for the two byte arrays
|
||||
if (val_start < func_start && val_end <= func_end) {
|
||||
val_offset = func_start - val_start;
|
||||
func_offset = 0;
|
||||
overlap_size = val_end - func_start;
|
||||
} else if (val_start >= func_start && val_end > func_end) {
|
||||
val_offset = 0;
|
||||
func_offset = val_start - func_start;
|
||||
overlap_size = func_end - val_start;
|
||||
} else if (val_start >= func_start && val_end <= func_end) {
|
||||
val_offset = 0;
|
||||
func_offset = val_start - func_start;
|
||||
overlap_size = size;
|
||||
} else {
|
||||
panic("BUG: Missed a case for a partial functional request");
|
||||
}
|
||||
|
||||
// Figure out how much of the partial overlap should be copied
|
||||
// into the packet and not overwrite previously found bytes.
|
||||
if (bytesValidStart == 0 && bytesValidEnd == 0) {
|
||||
// No bytes have been copied yet, just set indices
|
||||
// to found range
|
||||
bytesValidStart = func_offset;
|
||||
bytesValidEnd = func_offset + overlap_size;
|
||||
} else {
|
||||
// Some bytes have already been copied. Use bytesValid
|
||||
// indices and offset values to figure out how much data
|
||||
// to copy and where to copy it to.
|
||||
|
||||
// Indice overlap conditions to check
|
||||
int a = func_offset - bytesValidStart;
|
||||
int b = (func_offset + overlap_size) - bytesValidEnd;
|
||||
int c = func_offset - bytesValidEnd;
|
||||
int d = (func_offset + overlap_size) - bytesValidStart;
|
||||
|
||||
if (a >= 0 && b <= 0) {
|
||||
// bytes already in pkt data array are superset of
|
||||
// found bytes, will not copy any bytes
|
||||
overlap_size = 0;
|
||||
} else if (a < 0 && d >= 0 && b <= 0) {
|
||||
// found bytes will move bytesValidStart towards 0
|
||||
overlap_size = bytesValidStart - func_offset;
|
||||
bytesValidStart = func_offset;
|
||||
} else if (b > 0 && c <= 0 && a >= 0) {
|
||||
// found bytes will move bytesValidEnd
|
||||
// towards end of pkt data array
|
||||
overlap_size =
|
||||
(func_offset + overlap_size) - bytesValidEnd;
|
||||
val_offset += bytesValidEnd - func_offset;
|
||||
func_offset = bytesValidEnd;
|
||||
bytesValidEnd += overlap_size;
|
||||
} else if (a < 0 && b > 0) {
|
||||
// Found bytes are superset of copied range. Will move
|
||||
// bytesValidStart towards 0 and bytesValidEnd towards
|
||||
// end of pkt data array. Need to break copy into two
|
||||
// pieces so as to not overwrite previously found data.
|
||||
|
||||
// copy the first half
|
||||
uint8_t *dest = getPtr<uint8_t>() + func_offset;
|
||||
uint8_t *src = data + val_offset;
|
||||
memcpy(dest, src, (bytesValidStart - func_offset));
|
||||
|
||||
// re-calc the offsets and indices to do the copy
|
||||
// required for the second half
|
||||
val_offset += (bytesValidEnd - func_offset);
|
||||
bytesValidStart = func_offset;
|
||||
overlap_size =
|
||||
(func_offset + overlap_size) - bytesValidEnd;
|
||||
func_offset = bytesValidEnd;
|
||||
bytesValidEnd += overlap_size;
|
||||
} else if ((c > 0 && b > 0)
|
||||
|| (a < 0 && d < 0)) {
|
||||
// region to be copied is discontiguous! Not supported.
|
||||
panic("BUG: Discontiguous bytes found"
|
||||
"for functional copying!");
|
||||
}
|
||||
}
|
||||
assert(bytesValidEnd <= getSize());
|
||||
|
||||
// copy partial data into the packet's data array
|
||||
uint8_t *dest = getPtr<uint8_t>() + func_offset;
|
||||
uint8_t *src = data + val_offset;
|
||||
memcpy(dest, src, overlap_size);
|
||||
|
||||
// check if we're done filling the functional access
|
||||
bool done = (bytesValidStart == 0) && (bytesValidEnd == getSize());
|
||||
return done;
|
||||
}
|
||||
} else if (isWrite()) {
|
||||
if (offset >= 0) {
|
||||
|
||||
@@ -299,6 +299,13 @@ class Packet : public FastAlloc, public Printable
|
||||
*/
|
||||
MemCmd origCmd;
|
||||
|
||||
/**
|
||||
* These values specify the range of bytes found that satisfy a
|
||||
* functional read.
|
||||
*/
|
||||
uint16_t bytesValidStart;
|
||||
uint16_t bytesValidEnd;
|
||||
|
||||
public:
|
||||
/// Used to calculate latencies for each packet.
|
||||
Tick time;
|
||||
@@ -507,7 +514,8 @@ class Packet : public FastAlloc, public Printable
|
||||
*/
|
||||
Packet(Request *_req, MemCmd _cmd, NodeID _dest)
|
||||
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
|
||||
dest(_dest), time(curTick()), senderState(NULL)
|
||||
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
|
||||
time(curTick()), senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
addr = req->getPaddr();
|
||||
@@ -526,7 +534,8 @@ class Packet : public FastAlloc, public Printable
|
||||
*/
|
||||
Packet(Request *_req, MemCmd _cmd, NodeID _dest, int _blkSize)
|
||||
: flags(VALID_DST), cmd(_cmd), req(_req), data(NULL),
|
||||
dest(_dest), time(curTick()), senderState(NULL)
|
||||
dest(_dest), bytesValidStart(0), bytesValidEnd(0),
|
||||
time(curTick()), senderState(NULL)
|
||||
{
|
||||
if (req->hasPaddr()) {
|
||||
addr = req->getPaddr() & ~(_blkSize - 1);
|
||||
@@ -547,6 +556,7 @@ class Packet : public FastAlloc, public Printable
|
||||
: cmd(pkt->cmd), req(pkt->req),
|
||||
data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
|
||||
addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
|
||||
bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
|
||||
time(curTick()), senderState(pkt->senderState)
|
||||
{
|
||||
if (!clearFlags)
|
||||
@@ -554,6 +564,7 @@ class Packet : public FastAlloc, public Printable
|
||||
|
||||
flags.set(pkt->flags & (VALID_ADDR|VALID_SIZE|VALID_SRC|VALID_DST));
|
||||
flags.set(pkt->flags & STATIC_DATA);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1285,7 +1285,6 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||
vv_allocateL2CacheBlock;
|
||||
hp_copyFromTBEToL2;
|
||||
s_deallocateTBE;
|
||||
ka_wakeUpAllDependents;
|
||||
}
|
||||
|
||||
transition(I, Trigger_L2_to_L1D, IT) {
|
||||
@@ -1566,7 +1565,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
|
||||
k_popMandatoryQueue;
|
||||
}
|
||||
|
||||
transition({MM, M, MMR}, Flush_line, MM_F) {
|
||||
transition({MM, M, MMR, MR}, Flush_line, MM_F) {
|
||||
i_allocateTBE;
|
||||
bf_issueGETF;
|
||||
p_decrementNumberOfMessagesByOne;
|
||||
|
||||
@@ -198,7 +198,11 @@ MessageBuffer::enqueue(MsgPtr message, Time delta)
|
||||
m_last_arrival_time * g_eventQueue_ptr->getClock());
|
||||
}
|
||||
}
|
||||
m_last_arrival_time = arrival_time;
|
||||
|
||||
// If running a cache trace, don't worry about the last arrival checks
|
||||
if (!g_system_ptr->m_warmup_enabled) {
|
||||
m_last_arrival_time = arrival_time;
|
||||
}
|
||||
|
||||
// compute the delay cycles and set enqueue time
|
||||
Message* msg_ptr = message.get();
|
||||
|
||||
@@ -58,7 +58,6 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "config/no_vector_bounds_checks.hh"
|
||||
#include "mem/ruby/common/TypeDefines.hh"
|
||||
#include "sim/eventq.hh"
|
||||
|
||||
@@ -77,9 +76,6 @@ class RubyEventQueue : public EventManager
|
||||
void scheduleEventAbsolute(Consumer* consumer, Time timeAbs);
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
void triggerEvents(Time t) { assert(0); }
|
||||
void triggerAllEvents() { assert(0); }
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
RubyEventQueue(const RubyEventQueue& obj);
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,43 +27,154 @@
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "debug/RubyCacheTrace.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "gzstream.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
CacheRecorder::addRecord(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
TraceRecord::print(ostream& out) const
|
||||
{
|
||||
TraceRecord rec(sequencer, data_addr, pc_addr, type, time);
|
||||
m_records.push_back(rec);
|
||||
out << "[TraceRecord: Node, " << m_cntrl_id << ", "
|
||||
<< m_data_address << ", " << m_pc_address << ", "
|
||||
<< m_type << ", Time: " << m_time << "]";
|
||||
}
|
||||
|
||||
int
|
||||
CacheRecorder::dumpRecords(string filename)
|
||||
CacheRecorder::CacheRecorder()
|
||||
: m_uncompressed_trace(NULL),
|
||||
m_uncompressed_trace_size(0)
|
||||
{
|
||||
ogzstream out(filename.c_str());
|
||||
if (out.fail()) {
|
||||
cout << "Error: error opening file '" << filename << "'" << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace,
|
||||
uint64_t uncompressed_trace_size,
|
||||
std::vector<Sequencer*>& seq_map)
|
||||
: m_uncompressed_trace(uncompressed_trace),
|
||||
m_uncompressed_trace_size(uncompressed_trace_size),
|
||||
m_seq_map(seq_map), m_bytes_read(0), m_records_read(0),
|
||||
m_records_flushed(0)
|
||||
{
|
||||
}
|
||||
|
||||
CacheRecorder::~CacheRecorder()
|
||||
{
|
||||
if (m_uncompressed_trace != NULL) {
|
||||
delete m_uncompressed_trace;
|
||||
m_uncompressed_trace = NULL;
|
||||
}
|
||||
|
||||
std::sort(m_records.begin(), m_records.end(), greater<TraceRecord>());
|
||||
|
||||
int size = m_records.size();
|
||||
for (int i = 0; i < size; ++i)
|
||||
m_records[i].output(out);
|
||||
|
||||
m_records.clear();
|
||||
|
||||
return size;
|
||||
m_seq_map.clear();
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::print(ostream& out) const
|
||||
CacheRecorder::enqueueNextFlushRequest()
|
||||
{
|
||||
if (m_records_flushed < m_records.size()) {
|
||||
TraceRecord* rec = m_records[m_records_flushed];
|
||||
m_records_flushed++;
|
||||
Request* req = new Request(rec->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
MemCmd::Command requestType = MemCmd::FlushReq;
|
||||
Packet *pkt = new Packet(req, requestType, -1);
|
||||
|
||||
Sequencer* m_sequencer_ptr = m_seq_map[rec->m_cntrl_id];
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
DPRINTF(RubyCacheTrace, "Flushing %s\n", *rec);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::enqueueNextFetchRequest()
|
||||
{
|
||||
if (m_bytes_read < m_uncompressed_trace_size) {
|
||||
TraceRecord* traceRecord = (TraceRecord*) (m_uncompressed_trace +
|
||||
m_bytes_read);
|
||||
|
||||
DPRINTF(RubyCacheTrace, "Issuing %s\n", *traceRecord);
|
||||
Request* req = new Request();
|
||||
MemCmd::Command requestType;
|
||||
|
||||
if (traceRecord->m_type == RubyRequestType_LD) {
|
||||
requestType = MemCmd::ReadReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
} else if (traceRecord->m_type == RubyRequestType_IFETCH) {
|
||||
requestType = MemCmd::ReadReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),
|
||||
Request::INST_FETCH);
|
||||
} else {
|
||||
requestType = MemCmd::WriteReq;
|
||||
req->setPhys(traceRecord->m_data_address,
|
||||
RubySystem::getBlockSizeBytes(),0);
|
||||
}
|
||||
|
||||
Packet *pkt = new Packet(req, requestType, -1);
|
||||
pkt->dataStatic(traceRecord->m_data);
|
||||
|
||||
Sequencer* m_sequencer_ptr = m_seq_map[traceRecord->m_cntrl_id];
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
m_bytes_read += (sizeof(TraceRecord) +
|
||||
RubySystem::getBlockSizeBytes());
|
||||
m_records_read++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CacheRecorder::addRecord(int cntrl, const physical_address_t data_addr,
|
||||
const physical_address_t pc_addr,
|
||||
RubyRequestType type, Time time, DataBlock& data)
|
||||
{
|
||||
TraceRecord* rec = (TraceRecord*)malloc(sizeof(TraceRecord) +
|
||||
RubySystem::getBlockSizeBytes());
|
||||
rec->m_cntrl_id = cntrl;
|
||||
rec->m_time = time;
|
||||
rec->m_data_address = data_addr;
|
||||
rec->m_pc_address = pc_addr;
|
||||
rec->m_type = type;
|
||||
memcpy(rec->m_data, data.getData(0, RubySystem::getBlockSizeBytes()),
|
||||
RubySystem::getBlockSizeBytes());
|
||||
|
||||
m_records.push_back(rec);
|
||||
}
|
||||
|
||||
uint64
|
||||
CacheRecorder::aggregateRecords(uint8_t** buf, uint64 total_size)
|
||||
{
|
||||
std::sort(m_records.begin(), m_records.end(), compareTraceRecords);
|
||||
|
||||
int size = m_records.size();
|
||||
uint64 current_size = 0;
|
||||
int record_size = sizeof(TraceRecord) + RubySystem::getBlockSizeBytes();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
// Determine if we need to expand the buffer size
|
||||
if (current_size + record_size > total_size) {
|
||||
uint8_t* new_buf = new (nothrow) uint8_t[total_size * 2];
|
||||
if (new_buf == NULL) {
|
||||
fatal("Unable to allocate buffer of size %s\n",
|
||||
total_size * 2);
|
||||
}
|
||||
total_size = total_size * 2;
|
||||
uint8_t* old_buf = *buf;
|
||||
memcpy(new_buf, old_buf, current_size);
|
||||
*buf = new_buf;
|
||||
delete [] old_buf;
|
||||
}
|
||||
|
||||
// Copy the current record into the buffer
|
||||
memcpy(&((*buf)[current_size]), m_records[i], record_size);
|
||||
current_size += record_size;
|
||||
|
||||
free(m_records[i]);
|
||||
m_records[i] = NULL;
|
||||
}
|
||||
|
||||
m_records.clear();
|
||||
return current_size;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 2010 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,37 +35,90 @@
|
||||
#ifndef __MEM_RUBY_RECORDER_CACHERECORDER_HH__
|
||||
#define __MEM_RUBY_RECORDER_CACHERECORDER_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/common/TypeDefines.hh"
|
||||
|
||||
class Address;
|
||||
class TraceRecord;
|
||||
class Sequencer;
|
||||
|
||||
/*!
|
||||
* Class for recording cache contents. Note that the last element of the
|
||||
* class is an array of length zero. It is used for creating variable
|
||||
* length object, so that while writing the data to a file one does not
|
||||
* need to copy the meta data and the actual data separately.
|
||||
*/
|
||||
class TraceRecord {
|
||||
public:
|
||||
int m_cntrl_id;
|
||||
Time m_time;
|
||||
physical_address_t m_data_address;
|
||||
physical_address_t m_pc_address;
|
||||
RubyRequestType m_type;
|
||||
uint8_t m_data[0];
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
};
|
||||
|
||||
class CacheRecorder
|
||||
{
|
||||
public:
|
||||
void addRecord(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
int dumpRecords(std::string filename);
|
||||
CacheRecorder();
|
||||
~CacheRecorder();
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
CacheRecorder(uint8_t* uncompressed_trace,
|
||||
uint64_t uncompressed_trace_size,
|
||||
std::vector<Sequencer*>& SequencerMap);
|
||||
void addRecord(int cntrl, const physical_address_t data_addr,
|
||||
const physical_address_t pc_addr, RubyRequestType type,
|
||||
Time time, DataBlock& data);
|
||||
|
||||
uint64 aggregateRecords(uint8_t** data, uint64 size);
|
||||
|
||||
/*!
|
||||
* Function for flushing the memory contents of the caches to the
|
||||
* main memory. It goes through the recorded contents of the caches,
|
||||
* and issues flush requests. Except for the first one, a flush request
|
||||
* is issued only after the previous one has completed. This currently
|
||||
* requires use of MOESI Hammer protocol since only that protocol
|
||||
* supports flush requests.
|
||||
*/
|
||||
void enqueueNextFlushRequest();
|
||||
|
||||
/*!
|
||||
* Function for fetching warming up the memory and the caches. It goes
|
||||
* through the recorded contents of the caches, as available in the
|
||||
* checkpoint and issues fetch requests. Except for the first one, a
|
||||
* fetch request is issued only after the previous one has completed.
|
||||
* It should be possible to use this with any protocol.
|
||||
*/
|
||||
void enqueueNextFetchRequest();
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
CacheRecorder(const CacheRecorder& obj);
|
||||
CacheRecorder& operator=(const CacheRecorder& obj);
|
||||
|
||||
std::vector<TraceRecord> m_records;
|
||||
std::vector<TraceRecord*> m_records;
|
||||
uint8_t* m_uncompressed_trace;
|
||||
uint64_t m_uncompressed_trace_size;
|
||||
std::vector<Sequencer*> m_seq_map;
|
||||
uint64_t m_bytes_read;
|
||||
uint64_t m_records_read;
|
||||
uint64_t m_records_flushed;
|
||||
};
|
||||
|
||||
inline bool
|
||||
compareTraceRecords(const TraceRecord* n1, const TraceRecord* n2)
|
||||
{
|
||||
return n1->m_time > n2->m_time;
|
||||
}
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const CacheRecorder& obj)
|
||||
operator<<(std::ostream& out, const TraceRecord& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
|
||||
@@ -33,8 +33,4 @@ Import('*')
|
||||
if env['PROTOCOL'] == 'None':
|
||||
Return()
|
||||
|
||||
SimObject('Tracer.py')
|
||||
|
||||
Source('CacheRecorder.cc')
|
||||
Source('Tracer.cc')
|
||||
Source('TraceRecord.cc', Werror=False)
|
||||
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "mem/protocol/RubyRequest.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
TraceRecord::TraceRecord(Sequencer* _sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
{
|
||||
m_sequencer_ptr = _sequencer;
|
||||
m_data_address = data_addr;
|
||||
m_pc_address = pc_addr;
|
||||
m_time = time;
|
||||
m_type = type;
|
||||
|
||||
// Don't differentiate between store misses and atomic requests in
|
||||
// the trace
|
||||
if (m_type == RubyRequestType_Load_Linked) {
|
||||
m_type = RubyRequestType_ST;
|
||||
} else if (m_type == RubyRequestType_Store_Conditional) {
|
||||
m_type = RubyRequestType_ST;
|
||||
}
|
||||
}
|
||||
|
||||
TraceRecord::TraceRecord(const TraceRecord& obj)
|
||||
{
|
||||
// Call assignment operator
|
||||
*this = obj;
|
||||
}
|
||||
|
||||
TraceRecord&
|
||||
TraceRecord::operator=(const TraceRecord& obj)
|
||||
{
|
||||
m_sequencer_ptr = obj.m_sequencer_ptr;
|
||||
m_time = obj.m_time;
|
||||
m_data_address = obj.m_data_address;
|
||||
m_pc_address = obj.m_pc_address;
|
||||
m_type = obj.m_type;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::issueRequest() const
|
||||
{
|
||||
assert(m_sequencer_ptr != NULL);
|
||||
Request req(m_data_address.getAddress(), 0, 0);
|
||||
Packet *pkt = new Packet(&req, MemCmd(MemCmd::InvalidCmd), -1);
|
||||
|
||||
// Clear out the sequencer
|
||||
while (!m_sequencer_ptr->empty()) {
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
|
||||
}
|
||||
|
||||
m_sequencer_ptr->makeRequest(pkt);
|
||||
|
||||
// Clear out the sequencer
|
||||
while (!m_sequencer_ptr->empty()) {
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 100);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::print(ostream& out) const
|
||||
{
|
||||
out << "[TraceRecord: Node, " << m_sequencer_ptr->name() << ", "
|
||||
<< m_data_address << ", " << m_pc_address << ", "
|
||||
<< m_type << ", Time: " << m_time << "]";
|
||||
}
|
||||
|
||||
void
|
||||
TraceRecord::output(ostream& out) const
|
||||
{
|
||||
out << m_sequencer_ptr->name() << " ";
|
||||
m_data_address.output(out);
|
||||
out << " ";
|
||||
m_pc_address.output(out);
|
||||
out << " ";
|
||||
out << m_type;
|
||||
out << endl;
|
||||
}
|
||||
|
||||
bool
|
||||
TraceRecord::input(istream& in)
|
||||
{
|
||||
string sequencer_name;
|
||||
in >> sequencer_name;
|
||||
|
||||
// The SimObject find function is slow and iterates through the
|
||||
// simObjectList to find the sequencer pointer. Therefore, expect
|
||||
// trace playback to be slow.
|
||||
m_sequencer_ptr = (Sequencer*)SimObject::find(sequencer_name.c_str());
|
||||
|
||||
m_data_address.input(in);
|
||||
m_pc_address.input(in);
|
||||
if (in.eof())
|
||||
return false;
|
||||
|
||||
string type;
|
||||
in >> type;
|
||||
m_type = string_to_RubyRequestType(type);
|
||||
|
||||
// Ignore the rest of the line
|
||||
char c = '\0';
|
||||
while ((!in.eof()) && (c != '\n')) {
|
||||
in.get(c);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* A entry in the cache request record. It is aware of the ruby time
|
||||
* and can issue the request back to the cache.
|
||||
*/
|
||||
|
||||
#ifndef __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
||||
#define __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
|
||||
class CacheMsg;
|
||||
|
||||
class TraceRecord
|
||||
{
|
||||
public:
|
||||
TraceRecord(Sequencer* _sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
|
||||
TraceRecord()
|
||||
{
|
||||
m_sequencer_ptr = NULL;
|
||||
m_time = 0;
|
||||
m_type = RubyRequestType_NULL;
|
||||
}
|
||||
|
||||
TraceRecord(const TraceRecord& obj);
|
||||
TraceRecord& operator=(const TraceRecord& obj);
|
||||
|
||||
void issueRequest() const;
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
void output(std::ostream& out) const;
|
||||
bool input(std::istream& in);
|
||||
|
||||
private:
|
||||
friend bool operator>(const TraceRecord& n1, const TraceRecord& n2);
|
||||
|
||||
Sequencer* m_sequencer_ptr;
|
||||
Time m_time;
|
||||
Address m_data_address;
|
||||
Address m_pc_address;
|
||||
RubyRequestType m_type;
|
||||
};
|
||||
|
||||
inline bool
|
||||
operator>(const TraceRecord& n1, const TraceRecord& n2)
|
||||
{
|
||||
return n1.m_time > n2.m_time;
|
||||
}
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const TraceRecord& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_RECORDER_TRACERECORD_HH__
|
||||
@@ -1,135 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "base/cprintf.hh"
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "mem/ruby/recorder/TraceRecord.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Tracer::Tracer(const Params *p)
|
||||
: SimObject(p)
|
||||
{
|
||||
m_enabled = false;
|
||||
m_warmup_length = p->warmup_length;
|
||||
assert(m_warmup_length > 0);
|
||||
p->ruby_system->registerTracer(this);
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::startTrace(string filename)
|
||||
{
|
||||
if (m_enabled)
|
||||
stopTrace();
|
||||
|
||||
if (filename != "") {
|
||||
m_trace_file.open(filename.c_str());
|
||||
if (m_trace_file.fail()) {
|
||||
cprintf("Error: error opening file '%s'\n", filename);
|
||||
cprintf("Trace not enabled.\n");
|
||||
return;
|
||||
}
|
||||
cprintf("Request trace enabled to output file '%s'\n", filename);
|
||||
m_enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::stopTrace()
|
||||
{
|
||||
if (m_enabled) {
|
||||
m_trace_file.close();
|
||||
cout << "Request trace file closed." << endl;
|
||||
m_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::traceRequest(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time)
|
||||
{
|
||||
assert(m_enabled);
|
||||
TraceRecord tr(sequencer, data_addr, pc_addr, type, time);
|
||||
tr.output(m_trace_file);
|
||||
}
|
||||
|
||||
int
|
||||
Tracer::playbackTrace(string filename)
|
||||
{
|
||||
igzstream in(filename.c_str());
|
||||
if (in.fail()) {
|
||||
cprintf("Error: error opening file '%s'\n", filename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
|
||||
TraceRecord record;
|
||||
int counter = 0;
|
||||
// Read in the next TraceRecord
|
||||
bool ok = record.input(in);
|
||||
while (ok) {
|
||||
// Put it in the right cache
|
||||
record.issueRequest();
|
||||
counter++;
|
||||
|
||||
// Read in the next TraceRecord
|
||||
ok = record.input(in);
|
||||
|
||||
// Clear the statistics after warmup
|
||||
if (counter == m_warmup_length) {
|
||||
cprintf("Clearing stats after warmup of length %s\n",
|
||||
m_warmup_length);
|
||||
g_system_ptr->clearStats();
|
||||
}
|
||||
}
|
||||
|
||||
// Flush the prefetches through the system
|
||||
// FIXME - should be smarter
|
||||
g_eventQueue_ptr->triggerEvents(g_eventQueue_ptr->getTime() + 1000);
|
||||
|
||||
time_t stop_time = time(NULL);
|
||||
double seconds = difftime(stop_time, start_time);
|
||||
double minutes = seconds / 60.0;
|
||||
cout << "playbackTrace: " << minutes << " minutes" << endl;
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
void
|
||||
Tracer::print(ostream& out) const
|
||||
{
|
||||
}
|
||||
|
||||
Tracer *
|
||||
RubyTracerParams::create()
|
||||
{
|
||||
return new Tracer(this);
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Controller class of the tracer. Can stop/start/playback the ruby
|
||||
* cache requests trace.
|
||||
*/
|
||||
|
||||
#ifndef __MEM_RUBY_RECORDER_TRACER_HH__
|
||||
#define __MEM_RUBY_RECORDER_TRACER_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "params/RubyTracer.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
#include "gzstream.hh"
|
||||
|
||||
class Address;
|
||||
class TraceRecord;
|
||||
class Sequencer;
|
||||
|
||||
class Tracer : public SimObject
|
||||
{
|
||||
public:
|
||||
typedef RubyTracerParams Params;
|
||||
Tracer(const Params *p);
|
||||
|
||||
void startTrace(std::string filename);
|
||||
void stopTrace();
|
||||
bool traceEnabled() { return m_enabled; }
|
||||
void traceRequest(Sequencer* sequencer, const Address& data_addr,
|
||||
const Address& pc_addr, RubyRequestType type, Time time);
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
int playbackTrace(std::string filename);
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
Tracer(const Tracer& obj);
|
||||
Tracer& operator=(const Tracer& obj);
|
||||
|
||||
ogzstream m_trace_file;
|
||||
bool m_enabled;
|
||||
|
||||
//added by SS
|
||||
int m_warmup_length;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const Tracer& obj)
|
||||
{
|
||||
obj.print(out);
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_RECORDER_TRACER_HH__
|
||||
@@ -1,37 +0,0 @@
|
||||
# Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Steve Reinhardt
|
||||
# Brad Beckmann
|
||||
|
||||
from m5.params import *
|
||||
from m5.SimObject import SimObject
|
||||
|
||||
class RubyTracer(SimObject):
|
||||
type = 'RubyTracer'
|
||||
cxx_class = 'Tracer'
|
||||
warmup_length = Param.Int(100000, "")
|
||||
ruby_system = Param.RubySystem("")
|
||||
@@ -33,12 +33,11 @@
|
||||
#include <string>
|
||||
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/protocol/MachineType.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Consumer.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/network/Network.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "params/RubyController.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
@@ -68,6 +67,8 @@ class AbstractController : public SimObject, public Consumer
|
||||
virtual void wakeup() = 0;
|
||||
// virtual void dumpStats(std::ostream & out) = 0;
|
||||
virtual void clearStats() = 0;
|
||||
virtual void recordCacheTrace(int cntrl, CacheRecorder* tr) = 0;
|
||||
virtual Sequencer* getSequencer() const = 0;
|
||||
};
|
||||
|
||||
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -28,7 +28,9 @@
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "debug/RubyCache.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/system/CacheMemory.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -364,31 +366,42 @@ CacheMemory::profileGenericRequest(GenericRequestType requestType,
|
||||
}
|
||||
|
||||
void
|
||||
CacheMemory::recordCacheContents(CacheRecorder& tr) const
|
||||
CacheMemory::recordCacheContents(int cntrl, CacheRecorder* tr) const
|
||||
{
|
||||
uint64 warmedUpBlocks = 0;
|
||||
uint64 totalBlocks M5_VAR_USED = (uint64)m_cache_num_sets
|
||||
* (uint64)m_cache_assoc;
|
||||
|
||||
for (int i = 0; i < m_cache_num_sets; i++) {
|
||||
for (int j = 0; j < m_cache_assoc; j++) {
|
||||
AccessPermission perm = m_cache[i][j]->m_Permission;
|
||||
RubyRequestType request_type = RubyRequestType_NULL;
|
||||
if (perm == AccessPermission_Read_Only) {
|
||||
if (m_is_instruction_only_cache) {
|
||||
request_type = RubyRequestType_IFETCH;
|
||||
} else {
|
||||
request_type = RubyRequestType_LD;
|
||||
if (m_cache[i][j] != NULL) {
|
||||
AccessPermission perm = m_cache[i][j]->m_Permission;
|
||||
RubyRequestType request_type = RubyRequestType_NULL;
|
||||
if (perm == AccessPermission_Read_Only) {
|
||||
if (m_is_instruction_only_cache) {
|
||||
request_type = RubyRequestType_IFETCH;
|
||||
} else {
|
||||
request_type = RubyRequestType_LD;
|
||||
}
|
||||
} else if (perm == AccessPermission_Read_Write) {
|
||||
request_type = RubyRequestType_ST;
|
||||
}
|
||||
} else if (perm == AccessPermission_Read_Write) {
|
||||
request_type = RubyRequestType_ST;
|
||||
}
|
||||
|
||||
if (request_type != RubyRequestType_NULL) {
|
||||
#if 0
|
||||
tr.addRecord(m_chip_ptr->getID(), m_cache[i][j].m_Address,
|
||||
Address(0), request_type,
|
||||
m_replacementPolicy_ptr->getLastAccess(i, j));
|
||||
#endif
|
||||
if (request_type != RubyRequestType_NULL) {
|
||||
tr->addRecord(cntrl, m_cache[i][j]->m_Address.getAddress(),
|
||||
0, request_type,
|
||||
m_replacementPolicy_ptr->getLastAccess(i, j),
|
||||
m_cache[i][j]->getDataBlk());
|
||||
warmedUpBlocks++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DPRINTF(RubyCache, "%s: %lli blocks of %lli total blocks"
|
||||
"recorded %.2f%% \n", name().c_str(), warmedUpBlocks,
|
||||
(uint64)m_cache_num_sets * (uint64)m_cache_assoc,
|
||||
(float(warmedUpBlocks)/float(totalBlocks))*100.0);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -34,21 +34,15 @@
|
||||
#include <vector>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/protocol/GenericRequestType.hh"
|
||||
#include "mem/protocol/RubyRequest.hh"
|
||||
#include "mem/protocol/RubyRequestType.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/profiler/CacheProfiler.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractCacheEntry.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh"
|
||||
#include "mem/ruby/system/LRUPolicy.hh"
|
||||
#include "mem/ruby/system/PseudoLRUPolicy.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "params/RubyCache.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
@@ -100,12 +94,7 @@ class CacheMemory : public SimObject
|
||||
int getLatency() const { return m_latency; }
|
||||
|
||||
// Hook for checkpointing the contents of the cache
|
||||
void recordCacheContents(CacheRecorder& tr) const;
|
||||
void
|
||||
setAsInstructionCache(bool is_icache)
|
||||
{
|
||||
m_is_instruction_only_cache = is_icache;
|
||||
}
|
||||
void recordCacheContents(int cntrl, CacheRecorder* tr) const;
|
||||
|
||||
// Set this address to most recently used
|
||||
void setMRU(const Address& address);
|
||||
@@ -146,7 +135,6 @@ class CacheMemory : public SimObject
|
||||
|
||||
// Data Members (m_prefix)
|
||||
bool m_is_instruction_only_cache;
|
||||
bool m_is_data_only_cache;
|
||||
|
||||
// The first index is the # of cache lines.
|
||||
// The second index is the the amount associativity.
|
||||
|
||||
@@ -55,6 +55,9 @@ class DMASequencer : public RubyPort
|
||||
/* external interface */
|
||||
RequestStatus makeRequest(PacketPtr pkt);
|
||||
bool busy() { return m_is_busy;}
|
||||
int outstandingCount() const { return (m_is_busy ? 1 : 0); }
|
||||
bool isDeadlockEventScheduled() const { return false; }
|
||||
void descheduleDeadlockEvent() {}
|
||||
|
||||
/* SLICC callback */
|
||||
void dataCallback(const DataBlock & dblk);
|
||||
|
||||
@@ -58,6 +58,7 @@ DirectoryMemory::init()
|
||||
|
||||
if (m_use_map) {
|
||||
m_sparseMemory = new SparseMemory(m_map_levels);
|
||||
g_system_ptr->registerSparseMemory(m_sparseMemory);
|
||||
} else {
|
||||
m_entries = new AbstractEntry*[m_num_entries];
|
||||
for (int i = 0; i < m_num_entries; i++)
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#ifndef __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
#define __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
|
||||
#include "base/trace.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
|
||||
class DirectoryMemory;
|
||||
@@ -48,6 +49,8 @@ class MemoryVector
|
||||
|
||||
void write(const Address & paddr, uint8* data, int len);
|
||||
uint8* read(const Address & paddr, uint8* data, int len);
|
||||
uint32 collatePages(uint8* &raw_data);
|
||||
void populatePages(uint8* raw_data);
|
||||
|
||||
private:
|
||||
uint8* getBlockPtr(const PhysAddress & addr);
|
||||
@@ -56,6 +59,7 @@ class MemoryVector
|
||||
uint8** m_pages;
|
||||
uint32 m_num_pages;
|
||||
const uint32 m_page_offset_mask;
|
||||
static const uint32 PAGE_SIZE = 4096;
|
||||
};
|
||||
|
||||
inline
|
||||
@@ -97,7 +101,7 @@ MemoryVector::resize(uint32 size)
|
||||
delete [] m_pages;
|
||||
}
|
||||
m_size = size;
|
||||
assert(size%4096 == 0);
|
||||
assert(size%PAGE_SIZE == 0);
|
||||
m_num_pages = size >> 12;
|
||||
m_pages = new uint8*[m_num_pages];
|
||||
memset(m_pages, 0, m_num_pages * sizeof(uint8*));
|
||||
@@ -118,8 +122,8 @@ MemoryVector::write(const Address & paddr, uint8* data, int len)
|
||||
}
|
||||
if (all_zeros)
|
||||
return;
|
||||
m_pages[page_num] = new uint8[4096];
|
||||
memset(m_pages[page_num], 0, 4096);
|
||||
m_pages[page_num] = new uint8[PAGE_SIZE];
|
||||
memset(m_pages[page_num], 0, PAGE_SIZE);
|
||||
uint32 offset = paddr.getAddress() & m_page_offset_mask;
|
||||
memcpy(&m_pages[page_num][offset], data, len);
|
||||
} else {
|
||||
@@ -147,10 +151,82 @@ MemoryVector::getBlockPtr(const PhysAddress & paddr)
|
||||
{
|
||||
uint32 page_num = paddr.getAddress() >> 12;
|
||||
if (m_pages[page_num] == 0) {
|
||||
m_pages[page_num] = new uint8[4096];
|
||||
memset(m_pages[page_num], 0, 4096);
|
||||
m_pages[page_num] = new uint8[PAGE_SIZE];
|
||||
memset(m_pages[page_num], 0, PAGE_SIZE);
|
||||
}
|
||||
return &m_pages[page_num][paddr.getAddress()&m_page_offset_mask];
|
||||
}
|
||||
|
||||
/*!
|
||||
* Function for collating all the pages of the physical memory together.
|
||||
* In case a pointer for a page is NULL, this page needs only a single byte
|
||||
* to represent that the pointer is NULL. Otherwise, it needs 1 + PAGE_SIZE
|
||||
* bytes. The first represents that the page pointer is not NULL, and rest of
|
||||
* the bytes represent the data on the page.
|
||||
*/
|
||||
|
||||
inline uint32
|
||||
MemoryVector::collatePages(uint8* &raw_data)
|
||||
{
|
||||
uint32 num_zero_pages = 0;
|
||||
uint32 data_size = 0;
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
if (m_pages[i] == 0) num_zero_pages++;
|
||||
}
|
||||
|
||||
raw_data = new uint8[ sizeof(uint32) /* number of pages*/
|
||||
+ m_num_pages /* whether the page is all zeros */
|
||||
+ PAGE_SIZE * (m_num_pages - num_zero_pages)];
|
||||
|
||||
/* Write the number of pages to be stored. */
|
||||
memcpy(raw_data, &m_num_pages, sizeof(uint32));
|
||||
data_size = sizeof(uint32);
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
if (m_pages[i] == 0) {
|
||||
raw_data[data_size] = 0;
|
||||
} else {
|
||||
raw_data[data_size] = 1;
|
||||
memcpy(raw_data + data_size + 1, m_pages[i], PAGE_SIZE);
|
||||
data_size += PAGE_SIZE;
|
||||
}
|
||||
data_size += 1;
|
||||
}
|
||||
|
||||
return data_size;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Function for populating the pages of the memory using the available raw
|
||||
* data. Each page has a byte associate with it, which represents whether the
|
||||
* page was NULL or not, when all the pages were collated. The function assumes
|
||||
* that the number of pages in the memory are same as those that were recorded
|
||||
* in the checkpoint.
|
||||
*/
|
||||
inline void
|
||||
MemoryVector::populatePages(uint8* raw_data)
|
||||
{
|
||||
uint32 data_size = 0;
|
||||
uint32 num_pages = 0;
|
||||
|
||||
/* Read the number of pages that were stored. */
|
||||
memcpy(&num_pages, raw_data, sizeof(uint32));
|
||||
data_size = sizeof(uint32);
|
||||
assert(num_pages == m_num_pages);
|
||||
|
||||
for (uint32 i = 0;i < m_num_pages; ++i)
|
||||
{
|
||||
assert(m_pages[i] == 0);
|
||||
if (raw_data[data_size] != 0) {
|
||||
m_pages[i] = new uint8[PAGE_SIZE];
|
||||
memcpy(m_pages[i], raw_data + data_size + 1, PAGE_SIZE);
|
||||
data_size += PAGE_SIZE;
|
||||
}
|
||||
data_size += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_SYSTEM_MEMORYVECTOR_HH__
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
|
||||
template<class ENTRY>
|
||||
struct PerfectCacheLineState
|
||||
@@ -57,10 +56,6 @@ class PerfectCacheMemory
|
||||
|
||||
static void printConfig(std::ostream& out);
|
||||
|
||||
// perform a cache access and see if we hit or not. Return true
|
||||
// on a hit.
|
||||
bool tryCacheAccess(const CacheMsg& msg, bool& block_stc, ENTRY*& entry);
|
||||
|
||||
// tests to see if an address is present in the cache
|
||||
bool isTagPresent(const Address& address) const;
|
||||
|
||||
@@ -118,15 +113,6 @@ PerfectCacheMemory<ENTRY>::printConfig(std::ostream& out)
|
||||
{
|
||||
}
|
||||
|
||||
template<class ENTRY>
|
||||
inline bool
|
||||
PerfectCacheMemory<ENTRY>::tryCacheAccess(const CacheMsg& msg,
|
||||
bool& block_stc, ENTRY*& entry)
|
||||
{
|
||||
panic("not implemented");
|
||||
return true;
|
||||
}
|
||||
|
||||
// tests to see if an address is present in the cache
|
||||
template<class ENTRY>
|
||||
inline bool
|
||||
|
||||
@@ -27,11 +27,11 @@
|
||||
*/
|
||||
|
||||
#include "cpu/testers/rubytest/RubyTester.hh"
|
||||
#include "debug/Config.hh"
|
||||
#include "debug/Ruby.hh"
|
||||
#include "mem/protocol/AccessPermission.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "mem/physical.hh"
|
||||
|
||||
RubyPort::RubyPort(const Params *p)
|
||||
: MemObject(p)
|
||||
@@ -51,6 +51,8 @@ RubyPort::RubyPort(const Params *p)
|
||||
m_usingRubyTester = p->using_ruby_tester;
|
||||
access_phys_mem = p->access_phys_mem;
|
||||
|
||||
drainEvent = NULL;
|
||||
|
||||
ruby_system = p->ruby_system;
|
||||
waitingOnSequencer = false;
|
||||
}
|
||||
@@ -66,8 +68,10 @@ Port *
|
||||
RubyPort::getPort(const std::string &if_name, int idx)
|
||||
{
|
||||
if (if_name == "port") {
|
||||
return new M5Port(csprintf("%s-port%d", name(), idx), this,
|
||||
ruby_system, access_phys_mem);
|
||||
M5Port* cpuPort = new M5Port(csprintf("%s-port%d", name(), idx),
|
||||
this, ruby_system, access_phys_mem);
|
||||
cpu_ports.push_back(cpuPort);
|
||||
return cpuPort;
|
||||
}
|
||||
|
||||
if (if_name == "pio_port") {
|
||||
@@ -508,6 +512,82 @@ RubyPort::ruby_hit_callback(PacketPtr pkt)
|
||||
(*i)->sendRetry();
|
||||
}
|
||||
}
|
||||
|
||||
testDrainComplete();
|
||||
}
|
||||
|
||||
void
|
||||
RubyPort::testDrainComplete()
|
||||
{
|
||||
//If we weren't able to drain before, we might be able to now.
|
||||
if (drainEvent != NULL) {
|
||||
unsigned int drainCount = getDrainCount(drainEvent);
|
||||
DPRINTF(Config, "Drain count: %u\n", drainCount);
|
||||
if (drainCount == 0) {
|
||||
drainEvent->process();
|
||||
// Clear the drain event once we're done with it.
|
||||
drainEvent = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int
|
||||
RubyPort::getDrainCount(Event *de)
|
||||
{
|
||||
int count = 0;
|
||||
//
|
||||
// If the sequencer is not empty, then requests need to drain.
|
||||
// The outstandingCount is the number of requests outstanding and thus the
|
||||
// number of times M5's timing port will process the drain event.
|
||||
//
|
||||
count += outstandingCount();
|
||||
|
||||
DPRINTF(Config, "outstanding count %d\n", outstandingCount());
|
||||
|
||||
// To simplify the draining process, the sequencer's deadlock detection
|
||||
// event should have been descheduled.
|
||||
assert(isDeadlockEventScheduled() == false);
|
||||
|
||||
if (pio_port != NULL) {
|
||||
count += pio_port->drain(de);
|
||||
DPRINTF(Config, "count after pio check %d\n", count);
|
||||
}
|
||||
if (physMemPort != NULL) {
|
||||
count += physMemPort->drain(de);
|
||||
DPRINTF(Config, "count after physmem check %d\n", count);
|
||||
}
|
||||
|
||||
for (CpuPortIter p_iter = cpu_ports.begin(); p_iter != cpu_ports.end();
|
||||
p_iter++) {
|
||||
M5Port* cpu_port = *p_iter;
|
||||
count += cpu_port->drain(de);
|
||||
DPRINTF(Config, "count after cpu port check %d\n", count);
|
||||
}
|
||||
|
||||
DPRINTF(Config, "final count %d\n", count);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
RubyPort::drain(Event *de)
|
||||
{
|
||||
if (isDeadlockEventScheduled()) {
|
||||
descheduleDeadlockEvent();
|
||||
}
|
||||
|
||||
int count = getDrainCount(de);
|
||||
|
||||
// Set status
|
||||
if (count != 0) {
|
||||
drainEvent = de;
|
||||
|
||||
changeState(SimObject::Draining);
|
||||
return count;
|
||||
}
|
||||
|
||||
changeState(SimObject::Drained);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "mem/protocol/RequestStatus.hh"
|
||||
#include "mem/ruby/slicc_interface/RubyRequest.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/physical.hh"
|
||||
@@ -115,17 +114,23 @@ class RubyPort : public MemObject
|
||||
Port *getPort(const std::string &if_name, int idx);
|
||||
|
||||
virtual RequestStatus makeRequest(PacketPtr pkt) = 0;
|
||||
virtual int outstandingCount() const = 0;
|
||||
virtual bool isDeadlockEventScheduled() const = 0;
|
||||
virtual void descheduleDeadlockEvent() = 0;
|
||||
|
||||
//
|
||||
// Called by the controller to give the sequencer a pointer.
|
||||
// A pointer to the controller is needed for atomic support.
|
||||
//
|
||||
void setController(AbstractController* _cntrl) { m_controller = _cntrl; }
|
||||
int getId() { return m_version; }
|
||||
unsigned int drain(Event *de);
|
||||
|
||||
protected:
|
||||
const std::string m_name;
|
||||
void ruby_hit_callback(PacketPtr pkt);
|
||||
void hit(PacketPtr pkt);
|
||||
void testDrainComplete();
|
||||
|
||||
int m_version;
|
||||
AbstractController* m_controller;
|
||||
@@ -143,11 +148,19 @@ class RubyPort : public MemObject
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDrainCount(Event *de);
|
||||
|
||||
uint16_t m_port_id;
|
||||
uint64_t m_request_cnt;
|
||||
|
||||
M5Port* physMemPort;
|
||||
|
||||
/*! Vector of CPU Port attached to this Ruby port. */
|
||||
typedef std::vector<M5Port*>::iterator CpuPortIter;
|
||||
std::vector<M5Port*> cpu_ports;
|
||||
|
||||
Event *drainEvent;
|
||||
|
||||
PhysicalMemory* physmem;
|
||||
RubySystem* ruby_system;
|
||||
|
||||
|
||||
@@ -40,9 +40,7 @@
|
||||
#include "mem/protocol/RubyAccessMode.hh"
|
||||
#include "mem/ruby/buffers/MessageBuffer.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/common/SubBlock.hh"
|
||||
#include "mem/ruby/profiler/Profiler.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/slicc_interface/RubyRequest.hh"
|
||||
#include "mem/ruby/system/CacheMemory.hh"
|
||||
#include "mem/ruby/system/Sequencer.hh"
|
||||
@@ -521,7 +519,11 @@ Sequencer::hitCallback(SequencerRequest* srequest,
|
||||
}
|
||||
|
||||
// update the data
|
||||
if (pkt->getPtr<uint8_t>(true) != NULL) {
|
||||
if (g_system_ptr->m_warmup_enabled) {
|
||||
assert(pkt->getPtr<uint8_t>(false) != NULL);
|
||||
data.setData(pkt->getPtr<uint8_t>(false),
|
||||
request_address.getOffset(), pkt->getSize());
|
||||
} else if (pkt->getPtr<uint8_t>(true) != NULL) {
|
||||
if ((type == RubyRequestType_LD) ||
|
||||
(type == RubyRequestType_IFETCH) ||
|
||||
(type == RubyRequestType_RMW_Read) ||
|
||||
@@ -553,8 +555,17 @@ Sequencer::hitCallback(SequencerRequest* srequest,
|
||||
testerSenderState->subBlock->mergeFrom(data);
|
||||
}
|
||||
|
||||
ruby_hit_callback(pkt);
|
||||
delete srequest;
|
||||
|
||||
if (g_system_ptr->m_warmup_enabled) {
|
||||
delete pkt;
|
||||
g_system_ptr->m_cache_recorder->enqueueNextFetchRequest();
|
||||
} else if (g_system_ptr->m_cooldown_enabled) {
|
||||
delete pkt;
|
||||
g_system_ptr->m_cache_recorder->enqueueNextFlushRequest();
|
||||
} else {
|
||||
ruby_hit_callback(pkt);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -39,8 +39,6 @@
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
|
||||
class DataBlock;
|
||||
class CacheMsg;
|
||||
class MachineID;
|
||||
class CacheMemory;
|
||||
|
||||
class RubySequencerParams;
|
||||
@@ -100,6 +98,18 @@ class Sequencer : public RubyPort, public Consumer
|
||||
|
||||
RequestStatus makeRequest(PacketPtr pkt);
|
||||
bool empty() const;
|
||||
int outstandingCount() const { return m_outstanding_count; }
|
||||
bool
|
||||
isDeadlockEventScheduled() const
|
||||
{
|
||||
return deadlockCheckEvent.scheduled();
|
||||
}
|
||||
|
||||
void
|
||||
descheduleDeadlockEvent()
|
||||
{
|
||||
deschedule(deadlockCheckEvent);
|
||||
}
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
void printStats(std::ostream& out) const;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,6 +27,8 @@
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "debug/RubyCache.hh"
|
||||
#include "mem/ruby/system/SparseMemory.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
@@ -82,19 +85,19 @@ SparseMemory::recursivelyRemoveTables(SparseMapType* curTable, int curLevel)
|
||||
SparseMapType::iterator iter;
|
||||
|
||||
for (iter = curTable->begin(); iter != curTable->end(); iter++) {
|
||||
SparseMemEntry* entryStruct = &((*iter).second);
|
||||
SparseMemEntry entry = (*iter).second;
|
||||
|
||||
if (curLevel != (m_number_of_levels - 1)) {
|
||||
// If the not at the last level, analyze those lower level
|
||||
// tables first, then delete those next tables
|
||||
SparseMapType* nextTable = (SparseMapType*)(entryStruct->entry);
|
||||
SparseMapType* nextTable = (SparseMapType*)(entry);
|
||||
recursivelyRemoveTables(nextTable, (curLevel + 1));
|
||||
delete nextTable;
|
||||
} else {
|
||||
// If at the last level, delete the directory entry
|
||||
delete (AbstractEntry*)(entryStruct->entry);
|
||||
delete (AbstractEntry*)(entry);
|
||||
}
|
||||
entryStruct->entry = NULL;
|
||||
entry = NULL;
|
||||
}
|
||||
|
||||
// Once all entries have been deleted, erase the entries
|
||||
@@ -134,7 +137,7 @@ SparseMemory::exist(const Address& address) const
|
||||
// If the address is found, move on to the next level.
|
||||
// Otherwise, return not found
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
DPRINTF(RubyCache, "Not found\n");
|
||||
return false;
|
||||
@@ -156,7 +159,6 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
||||
|
||||
Address curAddress;
|
||||
SparseMapType* curTable = m_map_head;
|
||||
SparseMemEntry* entryStruct = NULL;
|
||||
|
||||
// Initiallize the high bit to be the total number of bits plus
|
||||
// the block offset. However the highest bit index is one less
|
||||
@@ -179,7 +181,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
||||
// if the address exists in the cur table, move on. Otherwise
|
||||
// create a new table.
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
m_adds_per_level[level]++;
|
||||
|
||||
@@ -194,9 +196,7 @@ SparseMemory::add(const Address& address, AbstractEntry* entry)
|
||||
|
||||
// Create the pointer container SparseMemEntry and add it
|
||||
// to the table.
|
||||
entryStruct = new SparseMemEntry;
|
||||
entryStruct->entry = newEntry;
|
||||
(*curTable)[curAddress] = *entryStruct;
|
||||
(*curTable)[curAddress] = newEntry;
|
||||
|
||||
// Move to the next level of the heirarchy
|
||||
curTable = (SparseMapType*)newEntry;
|
||||
@@ -215,7 +215,7 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
||||
{
|
||||
Address curAddress;
|
||||
CurNextInfo nextInfo;
|
||||
SparseMemEntry* entryStruct;
|
||||
SparseMemEntry entry;
|
||||
|
||||
// create the appropriate address for this level
|
||||
// Note: that set Address is inclusive of the specified range,
|
||||
@@ -231,11 +231,11 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
||||
|
||||
assert(curInfo.curTable->count(curAddress) != 0);
|
||||
|
||||
entryStruct = &((*(curInfo.curTable))[curAddress]);
|
||||
entry = (*(curInfo.curTable))[curAddress];
|
||||
|
||||
if (curInfo.level < (m_number_of_levels - 1)) {
|
||||
// set up next level's info
|
||||
nextInfo.curTable = (SparseMapType*)(entryStruct->entry);
|
||||
nextInfo.curTable = (SparseMapType*)(entry);
|
||||
nextInfo.level = curInfo.level + 1;
|
||||
|
||||
nextInfo.highBit = curInfo.highBit -
|
||||
@@ -252,15 +252,15 @@ SparseMemory::recursivelyRemoveLevels(const Address& address,
|
||||
if (tableSize == 0) {
|
||||
m_removes_per_level[curInfo.level]++;
|
||||
delete nextInfo.curTable;
|
||||
entryStruct->entry = NULL;
|
||||
entry = NULL;
|
||||
curInfo.curTable->erase(curAddress);
|
||||
}
|
||||
} else {
|
||||
// if this is the last level, we have reached the Directory
|
||||
// Entry and thus we should delete it including the
|
||||
// SparseMemEntry container struct.
|
||||
delete (AbstractEntry*)(entryStruct->entry);
|
||||
entryStruct->entry = NULL;
|
||||
delete (AbstractEntry*)(entry);
|
||||
entry = NULL;
|
||||
curInfo.curTable->erase(curAddress);
|
||||
m_removes_per_level[curInfo.level]++;
|
||||
}
|
||||
@@ -331,7 +331,7 @@ SparseMemory::lookup(const Address& address)
|
||||
// If the address is found, move on to the next level.
|
||||
// Otherwise, return not found
|
||||
if (curTable->count(curAddress) != 0) {
|
||||
curTable = (SparseMapType*)(((*curTable)[curAddress]).entry);
|
||||
curTable = (SparseMapType*)((*curTable)[curAddress]);
|
||||
} else {
|
||||
DPRINTF(RubyCache, "Not found\n");
|
||||
return NULL;
|
||||
@@ -344,6 +344,70 @@ SparseMemory::lookup(const Address& address)
|
||||
return entry;
|
||||
}
|
||||
|
||||
void
|
||||
SparseMemory::recordBlocks(int cntrl_id, CacheRecorder* tr) const
|
||||
{
|
||||
queue<SparseMapType*> unexplored_nodes[2];
|
||||
queue<physical_address_t> address_of_nodes[2];
|
||||
|
||||
unexplored_nodes[0].push(m_map_head);
|
||||
address_of_nodes[0].push(0);
|
||||
|
||||
int parity_of_level = 0;
|
||||
physical_address_t address, temp_address;
|
||||
Address curAddress;
|
||||
|
||||
// Initiallize the high bit to be the total number of bits plus
|
||||
// the block offset. However the highest bit index is one less
|
||||
// than this value.
|
||||
int highBit = m_total_number_of_bits + RubySystem::getBlockSizeBits();
|
||||
int lowBit;
|
||||
|
||||
for (int cur_level = 0; cur_level < m_number_of_levels; cur_level++) {
|
||||
|
||||
// create the appropriate address for this level
|
||||
// Note: that set Address is inclusive of the specified range,
|
||||
// thus the high bit is one less than the total number of bits
|
||||
// used to create the address.
|
||||
lowBit = highBit - m_number_of_bits_per_level[cur_level];
|
||||
|
||||
while (!unexplored_nodes[parity_of_level].empty()) {
|
||||
|
||||
SparseMapType* node = unexplored_nodes[parity_of_level].front();
|
||||
unexplored_nodes[parity_of_level].pop();
|
||||
|
||||
address = address_of_nodes[parity_of_level].front();
|
||||
address_of_nodes[parity_of_level].pop();
|
||||
|
||||
SparseMapType::iterator iter;
|
||||
|
||||
for (iter = node->begin(); iter != node->end(); iter++) {
|
||||
SparseMemEntry entry = (*iter).second;
|
||||
curAddress = (*iter).first;
|
||||
|
||||
if (cur_level != (m_number_of_levels - 1)) {
|
||||
// If not at the last level, put this node in the queue
|
||||
unexplored_nodes[1 - parity_of_level].push(
|
||||
(SparseMapType*)(entry));
|
||||
address_of_nodes[1 - parity_of_level].push(address |
|
||||
(curAddress.getAddress() << lowBit));
|
||||
} else {
|
||||
// If at the last level, add a trace record
|
||||
temp_address = address | (curAddress.getAddress()
|
||||
<< lowBit);
|
||||
DataBlock block = ((AbstractEntry*)entry)->getDataBlk();
|
||||
tr->addRecord(cntrl_id, temp_address, 0, RubyRequestType_ST, 0,
|
||||
block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adjust the highBit value for the next level
|
||||
highBit -= m_number_of_bits_per_level[cur_level];
|
||||
parity_of_level = 1 - parity_of_level;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SparseMemory::print(ostream& out) const
|
||||
{
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,15 +33,11 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
|
||||
struct SparseMemEntry
|
||||
{
|
||||
void* entry;
|
||||
};
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractEntry.hh"
|
||||
|
||||
typedef void* SparseMemEntry;
|
||||
typedef m5::hash_map<Address, SparseMemEntry> SparseMapType;
|
||||
|
||||
struct CurNextInfo
|
||||
@@ -63,6 +60,14 @@ class SparseMemory
|
||||
void add(const Address& address, AbstractEntry*);
|
||||
void remove(const Address& address);
|
||||
|
||||
/*!
|
||||
* Function for recording the contents of memory. This function walks
|
||||
* through all the levels of the sparse memory in a breadth first
|
||||
* fashion. This might need more memory than a depth first approach.
|
||||
* But breadth first seems easier to me than a depth first approach.
|
||||
*/
|
||||
void recordBlocks(int cntrl_id, CacheRecorder *) const;
|
||||
|
||||
AbstractEntry* lookup(const Address& address);
|
||||
|
||||
// Print cache contents
|
||||
@@ -95,12 +100,4 @@ class SparseMemory
|
||||
uint64_t* m_removes_per_level;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
operator<<(std::ostream& out, const SparseMemEntry& obj)
|
||||
{
|
||||
out << "SparseMemEntry";
|
||||
out << std::flush;
|
||||
return out;
|
||||
}
|
||||
|
||||
#endif // __MEM_RUBY_SYSTEM_SPARSEMEMORY_HH__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2011 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,16 +26,19 @@
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/output.hh"
|
||||
#include "mem/ruby/buffers/MessageBuffer.hh"
|
||||
#include "debug/RubySystem.hh"
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/network/Network.hh"
|
||||
#include "mem/ruby/profiler/Profiler.hh"
|
||||
#include "mem/ruby/recorder/Tracer.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/MemoryVector.hh"
|
||||
#include "mem/ruby/system/System.hh"
|
||||
#include "sim/simulate.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -49,7 +52,6 @@ int RubySystem::m_memory_size_bits;
|
||||
|
||||
Network* RubySystem::m_network_ptr;
|
||||
Profiler* RubySystem::m_profiler_ptr;
|
||||
Tracer* RubySystem::m_tracer_ptr;
|
||||
MemoryVector* RubySystem::m_mem_vec_ptr;
|
||||
|
||||
RubySystem::RubySystem(const Params *p)
|
||||
@@ -88,6 +90,8 @@ RubySystem::RubySystem(const Params *p)
|
||||
//
|
||||
RubyExitCallback* rubyExitCB = new RubyExitCallback(p->stats_filename);
|
||||
registerExitCallback(rubyExitCB);
|
||||
m_warmup_enabled = false;
|
||||
m_cooldown_enabled = false;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -108,23 +112,22 @@ RubySystem::registerProfiler(Profiler* profiler_ptr)
|
||||
m_profiler_ptr = profiler_ptr;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerTracer(Tracer* tracer_ptr)
|
||||
{
|
||||
m_tracer_ptr = tracer_ptr;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerAbstractController(AbstractController* cntrl)
|
||||
{
|
||||
m_abs_cntrl_vec.push_back(cntrl);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::registerSparseMemory(SparseMemory* s)
|
||||
{
|
||||
m_sparse_memory_vector.push_back(s);
|
||||
}
|
||||
|
||||
RubySystem::~RubySystem()
|
||||
{
|
||||
delete m_network_ptr;
|
||||
delete m_profiler_ptr;
|
||||
delete m_tracer_ptr;
|
||||
if (m_mem_vec_ptr)
|
||||
delete m_mem_vec_ptr;
|
||||
}
|
||||
@@ -166,10 +169,144 @@ RubySystem::printStats(ostream& out)
|
||||
m_network_ptr->printStats(out);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::writeCompressedTrace(uint8* raw_data, string filename,
|
||||
uint64 uncompressed_trace_size)
|
||||
{
|
||||
// Create the checkpoint file for the memory
|
||||
string thefile = Checkpoint::dir() + "/" + filename.c_str();
|
||||
|
||||
int fd = creat(thefile.c_str(), 0664);
|
||||
if (fd < 0) {
|
||||
perror("creat");
|
||||
fatal("Can't open memory trace file '%s'\n", filename);
|
||||
}
|
||||
|
||||
gzFile compressedMemory = gzdopen(fd, "wb");
|
||||
if (compressedMemory == NULL)
|
||||
fatal("Insufficient memory to allocate compression state for %s\n",
|
||||
filename);
|
||||
|
||||
if (gzwrite(compressedMemory, raw_data, uncompressed_trace_size) !=
|
||||
uncompressed_trace_size) {
|
||||
fatal("Write failed on memory trace file '%s'\n", filename);
|
||||
}
|
||||
|
||||
if (gzclose(compressedMemory)) {
|
||||
fatal("Close failed on memory trace file '%s'\n", filename);
|
||||
}
|
||||
delete raw_data;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::serialize(std::ostream &os)
|
||||
{
|
||||
m_cooldown_enabled = true;
|
||||
|
||||
vector<Sequencer*> sequencer_map;
|
||||
Sequencer* sequencer_ptr = NULL;
|
||||
int cntrl_id = -1;
|
||||
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
|
||||
if (sequencer_ptr == NULL) {
|
||||
sequencer_ptr = sequencer_map[cntrl];
|
||||
cntrl_id = cntrl;
|
||||
}
|
||||
}
|
||||
|
||||
assert(sequencer_ptr != NULL);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
if (sequencer_map[cntrl] == NULL) {
|
||||
sequencer_map[cntrl] = sequencer_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Create the CacheRecorder and record the cache trace
|
||||
m_cache_recorder = new CacheRecorder(NULL, 0, sequencer_map);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
m_abs_cntrl_vec[cntrl]->recordCacheTrace(cntrl, m_cache_recorder);
|
||||
}
|
||||
|
||||
// save the current tick value
|
||||
Tick curtick_original = curTick();
|
||||
// save the event queue head
|
||||
Event* eventq_head = eventq->replaceHead(NULL);
|
||||
|
||||
// Schedule an event to start cache cooldown
|
||||
RubyEvent* e = new RubyEvent(this);
|
||||
schedule(e,curTick());
|
||||
simulate();
|
||||
|
||||
// Restore eventq head
|
||||
eventq_head = eventq->replaceHead(eventq_head);
|
||||
// Restore curTick
|
||||
curTick(curtick_original);
|
||||
|
||||
uint8* raw_data = NULL;
|
||||
|
||||
if (m_mem_vec_ptr != NULL) {
|
||||
uint64 memory_trace_size = m_mem_vec_ptr->collatePages(raw_data);
|
||||
|
||||
string memory_trace_file = name() + ".memory.gz";
|
||||
writeCompressedTrace(raw_data, memory_trace_file,
|
||||
memory_trace_size);
|
||||
|
||||
SERIALIZE_SCALAR(memory_trace_file);
|
||||
SERIALIZE_SCALAR(memory_trace_size);
|
||||
|
||||
} else {
|
||||
for (int i = 0; i < m_sparse_memory_vector.size(); ++i) {
|
||||
m_sparse_memory_vector[i]->recordBlocks(cntrl_id,
|
||||
m_cache_recorder);
|
||||
}
|
||||
}
|
||||
|
||||
// Aggergate the trace entries together into a single array
|
||||
raw_data = new uint8_t[4096];
|
||||
uint64 cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
|
||||
4096);
|
||||
string cache_trace_file = name() + ".cache.gz";
|
||||
writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
|
||||
|
||||
SERIALIZE_SCALAR(cache_trace_file);
|
||||
SERIALIZE_SCALAR(cache_trace_size);
|
||||
|
||||
m_cooldown_enabled = false;
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::readCompressedTrace(string filename, uint8*& raw_data,
|
||||
uint64& uncompressed_trace_size)
|
||||
{
|
||||
// Read the trace file
|
||||
gzFile compressedTrace;
|
||||
|
||||
// trace file
|
||||
int fd = open(filename.c_str(), O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
fatal("Unable to open trace file %s", filename);
|
||||
}
|
||||
|
||||
compressedTrace = gzdopen(fd, "rb");
|
||||
if (compressedTrace == NULL) {
|
||||
fatal("Insufficient memory to allocate compression state for %s\n",
|
||||
filename);
|
||||
}
|
||||
|
||||
raw_data = new uint8_t[uncompressed_trace_size];
|
||||
if (gzread(compressedTrace, raw_data, uncompressed_trace_size) <
|
||||
uncompressed_trace_size) {
|
||||
fatal("Unable to read complete trace from file %s\n", filename);
|
||||
}
|
||||
|
||||
if (gzclose(compressedTrace)) {
|
||||
fatal("Failed to close cache trace file '%s'\n", filename);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -181,6 +318,88 @@ RubySystem::unserialize(Checkpoint *cp, const string §ion)
|
||||
// value of curTick()
|
||||
//
|
||||
clearStats();
|
||||
uint8* uncompressed_trace = NULL;
|
||||
|
||||
if (m_mem_vec_ptr != NULL) {
|
||||
string memory_trace_file;
|
||||
uint64 memory_trace_size = 0;
|
||||
|
||||
UNSERIALIZE_SCALAR(memory_trace_file);
|
||||
UNSERIALIZE_SCALAR(memory_trace_size);
|
||||
memory_trace_file = cp->cptDir + "/" + memory_trace_file;
|
||||
|
||||
readCompressedTrace(memory_trace_file, uncompressed_trace,
|
||||
memory_trace_size);
|
||||
m_mem_vec_ptr->populatePages(uncompressed_trace);
|
||||
|
||||
delete uncompressed_trace;
|
||||
uncompressed_trace = NULL;
|
||||
}
|
||||
|
||||
string cache_trace_file;
|
||||
uint64 cache_trace_size = 0;
|
||||
|
||||
UNSERIALIZE_SCALAR(cache_trace_file);
|
||||
UNSERIALIZE_SCALAR(cache_trace_size);
|
||||
cache_trace_file = cp->cptDir + "/" + cache_trace_file;
|
||||
|
||||
readCompressedTrace(cache_trace_file, uncompressed_trace,
|
||||
cache_trace_size);
|
||||
m_warmup_enabled = true;
|
||||
|
||||
vector<Sequencer*> sequencer_map;
|
||||
Sequencer* t = NULL;
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
sequencer_map.push_back(m_abs_cntrl_vec[cntrl]->getSequencer());
|
||||
if(t == NULL) t = sequencer_map[cntrl];
|
||||
}
|
||||
|
||||
assert(t != NULL);
|
||||
|
||||
for (int cntrl = 0; cntrl < m_abs_cntrl_vec.size(); cntrl++) {
|
||||
if (sequencer_map[cntrl] == NULL) {
|
||||
sequencer_map[cntrl] = t;
|
||||
}
|
||||
}
|
||||
|
||||
m_cache_recorder = new CacheRecorder(uncompressed_trace, cache_trace_size,
|
||||
sequencer_map);
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::startup()
|
||||
{
|
||||
if (m_warmup_enabled) {
|
||||
// save the current tick value
|
||||
Tick curtick_original = curTick();
|
||||
// save the event queue head
|
||||
Event* eventq_head = eventq->replaceHead(NULL);
|
||||
// set curTick to 0
|
||||
curTick(0);
|
||||
|
||||
// Schedule an event to start cache warmup
|
||||
RubyEvent* e = new RubyEvent(this);
|
||||
schedule(e,curTick());
|
||||
simulate();
|
||||
|
||||
delete m_cache_recorder;
|
||||
m_cache_recorder = NULL;
|
||||
m_warmup_enabled = false;
|
||||
// Restore eventq head
|
||||
eventq_head = eventq->replaceHead(eventq_head);
|
||||
// Restore curTick
|
||||
curTick(curtick_original);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::RubyEvent::process()
|
||||
{
|
||||
if (ruby_system->m_warmup_enabled) {
|
||||
ruby_system->m_cache_recorder->enqueueNextFetchRequest();
|
||||
} else if (ruby_system->m_cooldown_enabled) {
|
||||
ruby_system->m_cache_recorder->enqueueNextFlushRequest();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -190,11 +409,6 @@ RubySystem::clearStats() const
|
||||
m_network_ptr->clearStats();
|
||||
}
|
||||
|
||||
void
|
||||
RubySystem::recordCacheContents(CacheRecorder& tr) const
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CHECK_COHERENCE
|
||||
// This code will check for cases if the given cache block is exclusive in
|
||||
// one node and shared in another-- a coherence violation
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
|
||||
* Copyright (c) 1999-2012 Mark D. Hill and David A. Wood
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,21 +38,34 @@
|
||||
#include "base/callback.hh"
|
||||
#include "mem/ruby/common/Global.hh"
|
||||
#include "mem/ruby/eventqueue/RubyEventQueue.hh"
|
||||
#include "mem/ruby/system/RubyPort.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/slicc_interface/AbstractController.hh"
|
||||
#include "mem/ruby/system/MemoryVector.hh"
|
||||
#include "mem/ruby/system/SparseMemory.hh"
|
||||
#include "params/RubySystem.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
class AbstractController;
|
||||
class CacheRecorder;
|
||||
class MemoryVector;
|
||||
class Network;
|
||||
class Profiler;
|
||||
class Tracer;
|
||||
|
||||
class RubySystem : public SimObject
|
||||
{
|
||||
public:
|
||||
class RubyEvent : public Event
|
||||
{
|
||||
public:
|
||||
RubyEvent(RubySystem* _ruby_system)
|
||||
{
|
||||
ruby_system = _ruby_system;
|
||||
}
|
||||
private:
|
||||
void process();
|
||||
|
||||
RubySystem* ruby_system;
|
||||
};
|
||||
|
||||
friend class RubyEvent;
|
||||
|
||||
typedef RubySystemParams Params;
|
||||
RubySystem(const Params *p);
|
||||
~RubySystem();
|
||||
@@ -86,13 +99,6 @@ class RubySystem : public SimObject
|
||||
return m_profiler_ptr;
|
||||
}
|
||||
|
||||
static Tracer*
|
||||
getTracer()
|
||||
{
|
||||
assert(m_tracer_ptr != NULL);
|
||||
return m_tracer_ptr;
|
||||
}
|
||||
|
||||
static MemoryVector*
|
||||
getMemoryVector()
|
||||
{
|
||||
@@ -100,7 +106,6 @@ class RubySystem : public SimObject
|
||||
return m_mem_vec_ptr;
|
||||
}
|
||||
|
||||
void recordCacheContents(CacheRecorder& tr) const;
|
||||
static void printConfig(std::ostream& out);
|
||||
static void printStats(std::ostream& out);
|
||||
void clearStats() const;
|
||||
@@ -114,13 +119,15 @@ class RubySystem : public SimObject
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
virtual void serialize(std::ostream &os);
|
||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
void serialize(std::ostream &os);
|
||||
void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
void process();
|
||||
void startup();
|
||||
|
||||
void registerNetwork(Network*);
|
||||
void registerProfiler(Profiler*);
|
||||
void registerTracer(Tracer*);
|
||||
void registerAbstractController(AbstractController*);
|
||||
void registerSparseMemory(SparseMemory*);
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
@@ -130,6 +137,11 @@ class RubySystem : public SimObject
|
||||
void init();
|
||||
|
||||
static void printSystemConfig(std::ostream& out);
|
||||
void readCompressedTrace(std::string filename,
|
||||
uint8*& raw_data,
|
||||
uint64& uncompressed_trace_size);
|
||||
void writeCompressedTrace(uint8* raw_data, std::string file,
|
||||
uint64 uncompressed_trace_size);
|
||||
|
||||
private:
|
||||
// configuration parameters
|
||||
@@ -140,14 +152,16 @@ class RubySystem : public SimObject
|
||||
static int m_block_size_bits;
|
||||
static uint64 m_memory_size_bytes;
|
||||
static int m_memory_size_bits;
|
||||
|
||||
static Network* m_network_ptr;
|
||||
|
||||
public:
|
||||
static Profiler* m_profiler_ptr;
|
||||
static Tracer* m_tracer_ptr;
|
||||
static MemoryVector* m_mem_vec_ptr;
|
||||
std::vector<AbstractController*> m_abs_cntrl_vec;
|
||||
bool m_warmup_enabled;
|
||||
bool m_cooldown_enabled;
|
||||
CacheRecorder* m_cache_recorder;
|
||||
std::vector<SparseMemory*> m_sparse_memory_vector;
|
||||
};
|
||||
|
||||
inline std::ostream&
|
||||
|
||||
@@ -264,6 +264,8 @@ public:
|
||||
void clearStats();
|
||||
void blockOnQueue(Address addr, MessageBuffer* port);
|
||||
void unblock(Address addr);
|
||||
void recordCacheTrace(int cntrl, CacheRecorder* tr);
|
||||
Sequencer* getSequencer() const;
|
||||
|
||||
private:
|
||||
''')
|
||||
@@ -674,6 +676,12 @@ $vid->setDescription("[Version " + to_string(m_version) + ", ${ident}, name=${{v
|
||||
else:
|
||||
mq_ident = "NULL"
|
||||
|
||||
seq_ident = "NULL"
|
||||
for param in self.config_parameters:
|
||||
if param.name == "sequencer":
|
||||
assert(param.pointer)
|
||||
seq_ident = "m_%s_ptr" % param.name
|
||||
|
||||
code('''
|
||||
int
|
||||
$c_ident::getNumControllers()
|
||||
@@ -687,6 +695,12 @@ $c_ident::getMandatoryQueue() const
|
||||
return $mq_ident;
|
||||
}
|
||||
|
||||
Sequencer*
|
||||
$c_ident::getSequencer() const
|
||||
{
|
||||
return $seq_ident;
|
||||
}
|
||||
|
||||
const int &
|
||||
$c_ident::getVersion() const
|
||||
{
|
||||
@@ -875,6 +889,23 @@ $c_ident::unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr)
|
||||
|
||||
code('''
|
||||
|
||||
void
|
||||
$c_ident::recordCacheTrace(int cntrl, CacheRecorder* tr)
|
||||
{
|
||||
''')
|
||||
#
|
||||
# Record cache contents for all associated caches.
|
||||
#
|
||||
code.indent()
|
||||
for param in self.config_parameters:
|
||||
if param.type_ast.type.ident == "CacheMemory":
|
||||
assert(param.pointer)
|
||||
code('m_${{param.ident}}_ptr->recordCacheContents(cntrl, tr);')
|
||||
|
||||
code.dedent()
|
||||
code('''
|
||||
}
|
||||
|
||||
// Actions
|
||||
''')
|
||||
if self.TBEType != None and self.EntryType != None:
|
||||
|
||||
@@ -874,29 +874,62 @@ class SimObject(object):
|
||||
if hasattr(self, 'type'):
|
||||
print >>ini_file, 'type=%s' % self.type
|
||||
|
||||
child_names = self._children.keys()
|
||||
child_names.sort()
|
||||
if len(child_names):
|
||||
if len(self._children.keys()):
|
||||
print >>ini_file, 'children=%s' % \
|
||||
' '.join(self._children[n].get_name() for n in child_names)
|
||||
' '.join(self._children[n].get_name() \
|
||||
for n in sorted(self._children.keys()))
|
||||
|
||||
param_names = self._params.keys()
|
||||
param_names.sort()
|
||||
for param in param_names:
|
||||
for param in sorted(self._params.keys()):
|
||||
value = self._values.get(param)
|
||||
if value != None:
|
||||
print >>ini_file, '%s=%s' % (param,
|
||||
self._values[param].ini_str())
|
||||
|
||||
port_names = self._ports.keys()
|
||||
port_names.sort()
|
||||
for port_name in port_names:
|
||||
for port_name in sorted(self._ports.keys()):
|
||||
port = self._port_refs.get(port_name, None)
|
||||
if port != None:
|
||||
print >>ini_file, '%s=%s' % (port_name, port.ini_str())
|
||||
|
||||
print >>ini_file # blank line between objects
|
||||
|
||||
# generate a tree of dictionaries expressing all the parameters in the
|
||||
# instantiated system for use by scripts that want to do power, thermal
|
||||
# visualization, and other similar tasks
|
||||
def get_config_as_dict(self):
|
||||
d = attrdict()
|
||||
if hasattr(self, 'type'):
|
||||
d.type = self.type
|
||||
if hasattr(self, 'cxx_class'):
|
||||
d.cxx_class = self.cxx_class
|
||||
|
||||
for param in sorted(self._params.keys()):
|
||||
value = self._values.get(param)
|
||||
try:
|
||||
# Use native type for those supported by JSON and
|
||||
# strings for everything else. skipkeys=True seems
|
||||
# to not work as well as one would hope
|
||||
if type(self._values[param].value) in \
|
||||
[str, unicode, int, long, float, bool, None]:
|
||||
d[param] = self._values[param].value
|
||||
else:
|
||||
d[param] = str(self._values[param])
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
for n in sorted(self._children.keys()):
|
||||
d[self._children[n].get_name()] = self._children[n].get_config_as_dict()
|
||||
|
||||
for port_name in sorted(self._ports.keys()):
|
||||
port = self._port_refs.get(port_name, None)
|
||||
if port != None:
|
||||
# Might want to actually make this reference the object
|
||||
# in the future, although execing the string problem would
|
||||
# get some of the way there
|
||||
d[port_name] = port.ini_str()
|
||||
|
||||
return d
|
||||
|
||||
def getCCParams(self):
|
||||
if self._ccParams:
|
||||
return self._ccParams
|
||||
|
||||
@@ -87,6 +87,8 @@ def parse_options():
|
||||
group("Configuration Options")
|
||||
option("--dump-config", metavar="FILE", default="config.ini",
|
||||
help="Dump configuration output file [Default: %default]")
|
||||
option("--json-config", metavar="FILE", default="config.json",
|
||||
help="Create JSON output of the configuration [Default: %default]")
|
||||
|
||||
# Debugging options
|
||||
group("Debugging Options")
|
||||
@@ -121,7 +123,6 @@ def parse_options():
|
||||
execfile(options_file, scope)
|
||||
|
||||
arguments = options.parse_args()
|
||||
|
||||
return options,arguments
|
||||
|
||||
def interact(scope):
|
||||
|
||||
@@ -228,6 +228,12 @@ class SimObjectVector(VectorParamValue):
|
||||
for obj in v.descendants():
|
||||
yield obj
|
||||
|
||||
def get_config_as_dict(self):
|
||||
a = []
|
||||
for v in self:
|
||||
a.append(v.get_config_as_dict())
|
||||
return a
|
||||
|
||||
class VectorParamDesc(ParamDesc):
|
||||
# Convert assigned value to appropriate type. If the RHS is not a
|
||||
# list or tuple, it generates a single-element list.
|
||||
@@ -256,6 +262,9 @@ class VectorParamDesc(ParamDesc):
|
||||
self.ptype.cxx_predecls(code)
|
||||
code('%}')
|
||||
code()
|
||||
# Make sure the SWIGPY_SLICE_ARG is defined through this inclusion
|
||||
code('%include "std_container.i"')
|
||||
code()
|
||||
self.ptype.swig_predecls(code)
|
||||
code()
|
||||
code('%include "std_vector.i"')
|
||||
@@ -961,6 +970,9 @@ class Time(ParamValue):
|
||||
def ini_str(self):
|
||||
return str(self)
|
||||
|
||||
def get_config_as_dict(self):
|
||||
return str(self)
|
||||
|
||||
# Enumerated types are a little more complex. The user specifies the
|
||||
# type as Enum(foo) where foo is either a list or dictionary of
|
||||
# alternatives (typically strings, but not necessarily so). (In the
|
||||
|
||||
@@ -40,6 +40,7 @@ import SimObject
|
||||
import ticks
|
||||
import objects
|
||||
from util import fatal
|
||||
from util import attrdict
|
||||
|
||||
# define a MaxTick parameter
|
||||
MaxTick = 2**63 - 1
|
||||
@@ -71,6 +72,17 @@ def instantiate(ckpt_dir=None):
|
||||
obj.print_ini(ini_file)
|
||||
ini_file.close()
|
||||
|
||||
if options.json_config:
|
||||
try:
|
||||
import json
|
||||
json_file = file(os.path.join(options.outdir, options.json_config), 'w')
|
||||
d = root.get_config_as_dict()
|
||||
json.dump(d, json_file, indent=4)
|
||||
json_file.close()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
# Initialize the global statistics
|
||||
stats.initSimStats()
|
||||
|
||||
|
||||
@@ -54,8 +54,8 @@ class System(SimObject):
|
||||
physmem = Param.PhysicalMemory("Physical Memory")
|
||||
mem_mode = Param.MemoryMode('atomic', "The mode the memory system is in")
|
||||
memories = VectorParam.PhysicalMemory(Self.all, "All memories is the system")
|
||||
|
||||
work_item_id = Param.Int(-1, "specific work item id")
|
||||
num_work_ids = Param.Int(16, "Number of distinct work item types")
|
||||
work_begin_cpu_id_exit = Param.Int(-1,
|
||||
"work started on specific id, now exit simulation")
|
||||
work_begin_ckpt_count = Param.Counter(0,
|
||||
|
||||
@@ -417,6 +417,7 @@ workbegin(ThreadContext *tc, uint64_t workid, uint64_t threadid)
|
||||
tc->getCpuPtr()->workItemBegin();
|
||||
System *sys = tc->getSystemPtr();
|
||||
const System::Params *params = sys->params();
|
||||
sys->workItemBegin(threadid, workid);
|
||||
|
||||
DPRINTF(WorkItems, "Work Begin workid: %d, threadid %d\n", workid,
|
||||
threadid);
|
||||
@@ -473,6 +474,7 @@ workend(ThreadContext *tc, uint64_t workid, uint64_t threadid)
|
||||
tc->getCpuPtr()->workItemEnd();
|
||||
System *sys = tc->getSystemPtr();
|
||||
const System::Params *params = sys->params();
|
||||
sys->workItemEnd(threadid, workid);
|
||||
|
||||
DPRINTF(WorkItems, "Work End workid: %d, threadid %d\n", workid, threadid);
|
||||
|
||||
|
||||
@@ -1,4 +1,16 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2003-2006 The Regents of The University of Michigan
|
||||
* Copyright (c) 2011 Regents of the University of California
|
||||
* All rights reserved.
|
||||
@@ -43,6 +55,7 @@
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "debug/Loader.hh"
|
||||
#include "debug/WorkItems.hh"
|
||||
#include "kern/kernel_stats.hh"
|
||||
#include "mem/mem_object.hh"
|
||||
#include "mem/physical.hh"
|
||||
@@ -68,8 +81,9 @@ System::System(Params *p)
|
||||
memoryMode(p->mem_mode),
|
||||
workItemsBegin(0),
|
||||
workItemsEnd(0),
|
||||
numWorkIds(p->num_work_ids),
|
||||
_params(p),
|
||||
totalNumInsts(0),
|
||||
totalNumInsts(0),
|
||||
instEventQueue("system instruction-based event queue")
|
||||
{
|
||||
// add self to global system list
|
||||
@@ -158,6 +172,9 @@ System::~System()
|
||||
{
|
||||
delete kernelSymtab;
|
||||
delete kernel;
|
||||
|
||||
for (uint32_t j = 0; j < numWorkIds; j++)
|
||||
delete workItemStats[j];
|
||||
}
|
||||
|
||||
void
|
||||
@@ -319,6 +336,37 @@ System::unserialize(Checkpoint *cp, const string §ion)
|
||||
UNSERIALIZE_SCALAR(nextPID);
|
||||
}
|
||||
|
||||
void
|
||||
System::regStats()
|
||||
{
|
||||
for (uint32_t j = 0; j < numWorkIds ; j++) {
|
||||
workItemStats[j] = new Stats::Histogram();
|
||||
stringstream namestr;
|
||||
ccprintf(namestr, "work_item_type%d", j);
|
||||
workItemStats[j]->init(20)
|
||||
.name(name() + "." + namestr.str())
|
||||
.desc("Run time stat for" + namestr.str())
|
||||
.prereq(*workItemStats[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
System::workItemEnd(uint32_t tid, uint32_t workid)
|
||||
{
|
||||
std::pair<uint32_t,uint32_t> p(tid, workid);
|
||||
if (!lastWorkItemStarted.count(p))
|
||||
return;
|
||||
|
||||
Tick samp = curTick() - lastWorkItemStarted[p];
|
||||
DPRINTF(WorkItems, "Work item end: %d\t%d\t%lld\n", tid, workid, samp);
|
||||
|
||||
if (workid >= numWorkIds)
|
||||
fatal("Got workid greater than specified in system configuration\n");
|
||||
|
||||
workItemStats[workid]->sample(samp);
|
||||
lastWorkItemStarted.erase(p);
|
||||
}
|
||||
|
||||
void
|
||||
System::printSystems()
|
||||
{
|
||||
|
||||
@@ -157,14 +157,16 @@ class System : public SimObject
|
||||
Enums::MemoryMode memoryMode;
|
||||
uint64_t workItemsBegin;
|
||||
uint64_t workItemsEnd;
|
||||
uint32_t numWorkIds;
|
||||
std::vector<bool> activeCpus;
|
||||
|
||||
public:
|
||||
virtual void regStats();
|
||||
/**
|
||||
* Called by pseudo_inst to track the number of work items started by this
|
||||
* system.
|
||||
*/
|
||||
uint64_t
|
||||
uint64_t
|
||||
incWorkItemsBegin()
|
||||
{
|
||||
return ++workItemsBegin;
|
||||
@@ -198,6 +200,14 @@ class System : public SimObject
|
||||
return count;
|
||||
}
|
||||
|
||||
inline void workItemBegin(uint32_t tid, uint32_t workid)
|
||||
{
|
||||
std::pair<uint32_t,uint32_t> p(tid, workid);
|
||||
lastWorkItemStarted[p] = curTick();
|
||||
}
|
||||
|
||||
void workItemEnd(uint32_t tid, uint32_t workid);
|
||||
|
||||
/**
|
||||
* Fix up an address used to match PCs for hooking simulator
|
||||
* events on to target function executions. See comment in
|
||||
@@ -285,6 +295,8 @@ class System : public SimObject
|
||||
public:
|
||||
Counter totalNumInsts;
|
||||
EventQueue instEventQueue;
|
||||
std::map<std::pair<uint32_t,uint32_t>, Tick> lastWorkItemStarted;
|
||||
std::map<uint32_t, Stats::Histogram*> workItemStats;
|
||||
|
||||
////////////////////////////////////////////
|
||||
//
|
||||
|
||||
@@ -500,7 +500,7 @@ egid=100
|
||||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
||||
@@ -3,11 +3,10 @@ Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing/si
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 16 2011 11:08:03
|
||||
gem5 started Nov 17 2011 13:09:16
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/o3-timing
|
||||
tests
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
||||
@@ -3,26 +3,26 @@
|
||||
sim_seconds 0.586294 # Number of seconds simulated
|
||||
sim_ticks 586294224000 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 112274 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 40595683 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 244844 # Number of bytes of host memory used
|
||||
host_seconds 14442.28 # Real time elapsed on the host
|
||||
host_inst_rate 115446 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 41742717 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 244900 # Number of bytes of host memory used
|
||||
host_seconds 14045.43 # Real time elapsed on the host
|
||||
sim_insts 1621493982 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 1172588449 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.BPredUnit.lookups 142448983 # Number of BP lookups
|
||||
system.cpu.BPredUnit.condPredicted 142448983 # Number of conditional branches predicted
|
||||
system.cpu.BPredUnit.lookups 142448982 # Number of BP lookups
|
||||
system.cpu.BPredUnit.condPredicted 142448982 # Number of conditional branches predicted
|
||||
system.cpu.BPredUnit.condIncorrect 7804844 # Number of conditional branches incorrect
|
||||
system.cpu.BPredUnit.BTBLookups 134509889 # Number of BTB lookups
|
||||
system.cpu.BPredUnit.BTBLookups 134509888 # Number of BTB lookups
|
||||
system.cpu.BPredUnit.BTBHits 133615988 # Number of BTB hits
|
||||
system.cpu.BPredUnit.BTBCorrect 0 # Number of correct BTB predictions (this stat may not work properly.
|
||||
system.cpu.BPredUnit.usedRAS 0 # Number of times the RAS was used to get a target.
|
||||
system.cpu.BPredUnit.RASInCorrect 0 # Number of incorrect RAS predictions.
|
||||
system.cpu.fetch.icacheStallCycles 143149229 # Number of cycles fetch is stalled on an Icache miss
|
||||
system.cpu.fetch.Insts 1143761055 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.Branches 142448983 # Number of branches that fetch encountered
|
||||
system.cpu.fetch.Insts 1143761054 # Number of instructions fetch has processed
|
||||
system.cpu.fetch.Branches 142448982 # Number of branches that fetch encountered
|
||||
system.cpu.fetch.predictedBranches 133615988 # Number of branches that fetch has predicted taken
|
||||
system.cpu.fetch.Cycles 330199440 # Number of cycles fetch has run and was not squashing or blocked
|
||||
system.cpu.fetch.SquashCycles 57554993 # Number of cycles fetch has spent squashing
|
||||
@@ -66,32 +66,32 @@ system.cpu.rename.RenamedInsts 2043122328 # Nu
|
||||
system.cpu.rename.ROBFullEvents 2634 # Number of times rename has blocked due to ROB full
|
||||
system.cpu.rename.IQFullEvents 278313629 # Number of times rename has blocked due to IQ full
|
||||
system.cpu.rename.LSQFullEvents 129499394 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.RenamedOperands 2031527324 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 4954653616 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 4954649396 # Number of integer rename lookups
|
||||
system.cpu.rename.RenamedOperands 2031527322 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 4954653611 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 4954649391 # Number of integer rename lookups
|
||||
system.cpu.rename.fp_rename_lookups 4220 # Number of floating rename lookups
|
||||
system.cpu.rename.CommittedMaps 1617994650 # Number of HB maps that are committed
|
||||
system.cpu.rename.UndoneMaps 413532674 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.UndoneMaps 413532672 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.serializingInsts 91 # count of serializing insts renamed
|
||||
system.cpu.rename.tempSerializingInsts 91 # count of temporary serializing insts renamed
|
||||
system.cpu.rename.skidInsts 793190427 # count of insts added to the skid buffer
|
||||
system.cpu.memDep0.insertedLoads 519090632 # Number of loads inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.insertedStores 226808407 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.conflictingLoads 354951645 # Number of conflicting loads.
|
||||
system.cpu.memDep0.conflictingStores 148937435 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 1986583518 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 216 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 1781630005 # Number of instructions issued
|
||||
system.cpu.memDep0.conflictingStores 148937436 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 1986583516 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 218 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 1781630004 # Number of instructions issued
|
||||
system.cpu.iq.iqSquashedInstsIssued 180825 # Number of squashed instructions issued
|
||||
system.cpu.iq.iqSquashedInstsExamined 364939190 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu.iq.iqSquashedOperandsExamined 670712331 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 166 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.iqSquashedOperandsExamined 670712329 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 168 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.issued_per_cycle::samples 1172439660 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::mean 1.519592 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::stdev 1.333662 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::underflows 0 0.00% 0.00% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::0 271921708 23.19% 23.19% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::1 416937500 35.56% 58.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::0 271921709 23.19% 23.19% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::1 416937499 35.56% 58.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::2 234725234 20.02% 78.77% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::3 156776493 13.37% 92.15% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 54385701 4.64% 96.79% # Number of insts issued each cycle
|
||||
@@ -138,7 +138,7 @@ system.cpu.iq.fu_full::MemWrite 148998 5.73% 100.00% # at
|
||||
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.FU_type_0::No_OpClass 26894248 1.51% 1.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 1102052870 61.86% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 1102052869 61.86% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntMult 0 0.00% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 63.37% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 63.37% # Type of FU issued
|
||||
@@ -171,17 +171,17 @@ system.cpu.iq.FU_type_0::MemRead 457985397 25.71% 89.07% # Ty
|
||||
system.cpu.iq.FU_type_0::MemWrite 194697490 10.93% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 1781630005 # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 1781630004 # Type of FU issued
|
||||
system.cpu.iq.rate 1.519399 # Inst issue rate
|
||||
system.cpu.iq.fu_busy_cnt 2598665 # FU busy when requested
|
||||
system.cpu.iq.fu_busy_rate 0.001459 # FU busy rate (busy events/executed inst)
|
||||
system.cpu.iq.int_inst_queue_reads 4738479065 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_reads 4738479063 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_writes 2351732069 # Number of integer instruction queue writes
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053766 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 1760053765 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.fp_inst_queue_reads 95 # Number of floating instruction queue reads
|
||||
system.cpu.iq.fp_inst_queue_writes 542 # Number of floating instruction queue writes
|
||||
system.cpu.iq.fp_inst_queue_wakeup_accesses 12 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu.iq.int_alu_accesses 1757334382 # Number of integer alu accesses
|
||||
system.cpu.iq.int_alu_accesses 1757334381 # Number of integer alu accesses
|
||||
system.cpu.iq.fp_alu_accesses 40 # Number of floating point alu accesses
|
||||
system.cpu.iew.lsq.thread0.forwLoads 205665909 # Number of loads that had data forwarded from stores
|
||||
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
@@ -208,7 +208,7 @@ system.cpu.iew.memOrderViolationEvents 216417 # Nu
|
||||
system.cpu.iew.predictedTakenIncorrect 4603219 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu.iew.predictedNotTakenIncorrect 3388875 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.branchMispredicts 7992094 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.iewExecutedInsts 1768232809 # Number of executed instructions
|
||||
system.cpu.iew.iewExecutedInsts 1768232808 # Number of executed instructions
|
||||
system.cpu.iew.iewExecLoadInsts 452047218 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 13397196 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.exec_swp 0 # number of swp insts executed
|
||||
@@ -217,8 +217,8 @@ system.cpu.iew.exec_refs 645919458 # nu
|
||||
system.cpu.iew.exec_branches 112169596 # Number of branches executed
|
||||
system.cpu.iew.exec_stores 193872240 # Number of stores executed
|
||||
system.cpu.iew.exec_rate 1.507974 # Inst execution rate
|
||||
system.cpu.iew.wb_sent 1766226830 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 1760053778 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_sent 1766226829 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 1760053777 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_producers 1336567337 # num instructions producing a value
|
||||
system.cpu.iew.wb_consumers 2003494286 # num instructions consuming a value
|
||||
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
@@ -268,9 +268,9 @@ system.cpu.cpi_total 0.723153 # CP
|
||||
system.cpu.ipc 1.382833 # IPC: Instructions Per Cycle
|
||||
system.cpu.ipc_total 1.382833 # IPC: Total IPC of All Threads
|
||||
system.cpu.int_regfile_reads 3273039620 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 1756091293 # number of integer regfile writes
|
||||
system.cpu.int_regfile_writes 1756091292 # number of integer regfile writes
|
||||
system.cpu.fp_regfile_reads 12 # number of floating regfile reads
|
||||
system.cpu.misc_regfile_reads 908871446 # number of misc regfile reads
|
||||
system.cpu.misc_regfile_reads 908871445 # number of misc regfile reads
|
||||
system.cpu.icache.replacements 12 # number of replacements
|
||||
system.cpu.icache.tagsinuse 810.394392 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 137025977 # Total number of references to valid blocks.
|
||||
|
||||
@@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
|
||||
type=System
|
||||
children=cpu membus physmem
|
||||
mem_mode=atomic
|
||||
memories=system.physmem
|
||||
physmem=system.physmem
|
||||
work_begin_ckpt_count=0
|
||||
work_begin_cpu_id_exit=-1
|
||||
@@ -61,12 +62,12 @@ type=ExeTracer
|
||||
[system.cpu.workload]
|
||||
type=LiveProcess
|
||||
cmd=gzip input.log 1
|
||||
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
|
||||
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
|
||||
egid=100
|
||||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
warn: Sockets disabled, not accepting gdb connections
|
||||
For more information see: http://www.m5sim.org/warn/d946bea6
|
||||
warn: instruction 'fnstcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
warn: instruction 'fldcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
hack: be nice to actually delete the event here
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
M5 Simulator System
|
||||
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simout
|
||||
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
Copyright (c) 2001-2008
|
||||
The Regents of The University of Michigan
|
||||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Apr 19 2011 12:22:33
|
||||
M5 started Apr 19 2011 12:22:36
|
||||
M5 executing on maize
|
||||
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-atomic
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-atomic
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
||||
@@ -1,34 +1,34 @@
|
||||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 3280168 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 202508 # Number of bytes of host memory used
|
||||
host_seconds 494.33 # Real time elapsed on the host
|
||||
host_tick_rate 1950088412 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
sim_seconds 0.963993 # Number of seconds simulated
|
||||
sim_ticks 963992704000 # Number of ticks simulated
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 1220339 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 725502264 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 234168 # Number of bytes of host memory used
|
||||
host_seconds 1328.72 # Real time elapsed on the host
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 1927985409 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_busy_cycles 1927985409 # Number of busy cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
||||
@@ -9,6 +9,7 @@ time_sync_spin_threshold=100000000
|
||||
type=System
|
||||
children=cpu membus physmem
|
||||
mem_mode=atomic
|
||||
memories=system.physmem
|
||||
physmem=system.physmem
|
||||
work_begin_ckpt_count=0
|
||||
work_begin_cpu_id_exit=-1
|
||||
@@ -164,12 +165,12 @@ type=ExeTracer
|
||||
[system.cpu.workload]
|
||||
type=LiveProcess
|
||||
cmd=gzip input.log 1
|
||||
cwd=build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
|
||||
cwd=build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
|
||||
egid=100
|
||||
env=
|
||||
errout=cerr
|
||||
euid=100
|
||||
executable=/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
executable=/scratch/nilay/GEM5/dist/m5/cpu2000/binaries/x86/linux/gzip
|
||||
gid=100
|
||||
input=cin
|
||||
max_stack_size=67108864
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
warn: Sockets disabled, not accepting gdb connections
|
||||
For more information see: http://www.m5sim.org/warn/d946bea6
|
||||
warn: instruction 'fnstcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
warn: instruction 'fldcw_Mw' unimplemented
|
||||
For more information see: http://www.m5sim.org/warn/437d5238
|
||||
hack: be nice to actually delete the event here
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
M5 Simulator System
|
||||
Redirecting stdout to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simout
|
||||
Redirecting stderr to build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
Copyright (c) 2001-2008
|
||||
The Regents of The University of Michigan
|
||||
All Rights Reserved
|
||||
|
||||
|
||||
M5 compiled Apr 19 2011 12:22:33
|
||||
M5 started Apr 19 2011 12:23:09
|
||||
M5 executing on maize
|
||||
command line: build/X86_SE/m5.fast -d build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/fast/long/00.gzip/x86/linux/simple-timing
|
||||
gem5 compiled Jan 9 2012 14:18:02
|
||||
gem5 started Jan 9 2012 14:29:08
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_SE/gem5.opt -d build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing -re tests/run.py build/X86_SE/tests/opt/long/00.gzip/x86/linux/simple-timing
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
spec_init
|
||||
|
||||
@@ -1,223 +1,223 @@
|
||||
|
||||
---------- Begin Simulation Statistics ----------
|
||||
host_inst_rate 2023797 # Simulator instruction rate (inst/s)
|
||||
host_mem_usage 210248 # Number of bytes of host memory used
|
||||
host_seconds 801.21 # Real time elapsed on the host
|
||||
host_tick_rate 2250658484 # Simulator tick rate (ticks/s)
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
sim_seconds 1.803259 # Number of seconds simulated
|
||||
sim_ticks 1803258587000 # Number of ticks simulated
|
||||
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
|
||||
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
|
||||
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
|
||||
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
|
||||
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
|
||||
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
|
||||
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.overall_hits 606786134 # number of overall hits
|
||||
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
|
||||
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
|
||||
system.cpu.dcache.overall_misses 442048 # number of overall misses
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.dcache.replacements 437952 # number of replacements
|
||||
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.writebacks 396372 # number of writebacks
|
||||
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
|
||||
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
|
||||
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
|
||||
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
|
||||
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
|
||||
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
|
||||
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
|
||||
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
|
||||
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.overall_hits 1186516018 # number of overall hits
|
||||
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
|
||||
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
|
||||
system.cpu.icache.overall_misses 722 # number of overall misses
|
||||
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 760773 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 846053445 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 242892 # Number of bytes of host memory used
|
||||
host_seconds 2131.38 # Real time elapsed on the host
|
||||
sim_insts 1621493983 # Number of instructions simulated
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.icache.replacements 4 # number of replacements
|
||||
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.tagsinuse 660.186297 # Cycle average of tags in use
|
||||
system.cpu.icache.total_refs 1186516018 # Total number of references to valid blocks.
|
||||
system.cpu.icache.sampled_refs 722 # Sample count of references to valid blocks.
|
||||
system.cpu.icache.avg_refs 1643373.986150 # Average number of references to valid blocks.
|
||||
system.cpu.icache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.icache.occ_blocks::0 660.186297 # Average occupied blocks per context
|
||||
system.cpu.icache.occ_percent::0 0.322357 # Average percentage of cache occupancy
|
||||
system.cpu.icache.ReadReq_hits 1186516018 # number of ReadReq hits
|
||||
system.cpu.icache.demand_hits 1186516018 # number of demand (read+write) hits
|
||||
system.cpu.icache.overall_hits 1186516018 # number of overall hits
|
||||
system.cpu.icache.ReadReq_misses 722 # number of ReadReq misses
|
||||
system.cpu.icache.demand_misses 722 # number of demand (read+write) misses
|
||||
system.cpu.icache.overall_misses 722 # number of overall misses
|
||||
system.cpu.icache.ReadReq_miss_latency 40432000 # number of ReadReq miss cycles
|
||||
system.cpu.icache.demand_miss_latency 40432000 # number of demand (read+write) miss cycles
|
||||
system.cpu.icache.overall_miss_latency 40432000 # number of overall miss cycles
|
||||
system.cpu.icache.ReadReq_accesses 1186516740 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.icache.demand_accesses 1186516740 # number of demand (read+write) accesses
|
||||
system.cpu.icache.overall_accesses 1186516740 # number of overall (read+write) accesses
|
||||
system.cpu.icache.ReadReq_miss_rate 0.000001 # miss rate for ReadReq accesses
|
||||
system.cpu.icache.demand_miss_rate 0.000001 # miss rate for demand accesses
|
||||
system.cpu.icache.overall_miss_rate 0.000001 # miss rate for overall accesses
|
||||
system.cpu.icache.ReadReq_avg_miss_latency 56000 # average ReadReq miss latency
|
||||
system.cpu.icache.demand_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.overall_avg_miss_latency 56000 # average overall miss latency
|
||||
system.cpu.icache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.icache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.icache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.icache.writebacks 0 # number of writebacks
|
||||
system.cpu.idle_fraction 0 # Percentage of idle cycles
|
||||
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
|
||||
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
|
||||
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.icache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.icache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.icache.ReadReq_mshr_misses 722 # number of ReadReq MSHR misses
|
||||
system.cpu.icache.demand_mshr_misses 722 # number of demand (read+write) MSHR misses
|
||||
system.cpu.icache.overall_mshr_misses 722 # number of overall MSHR misses
|
||||
system.cpu.icache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.icache.ReadReq_mshr_miss_latency 38266000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.icache.demand_mshr_miss_latency 38266000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_miss_latency 38266000 # number of overall MSHR miss cycles
|
||||
system.cpu.icache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.icache.ReadReq_mshr_miss_rate 0.000001 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.icache.demand_mshr_miss_rate 0.000001 # mshr miss rate for demand accesses
|
||||
system.cpu.icache.overall_mshr_miss_rate 0.000001 # mshr miss rate for overall accesses
|
||||
system.cpu.icache.ReadReq_avg_mshr_miss_latency 53000 # average ReadReq mshr miss latency
|
||||
system.cpu.icache.demand_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_miss_latency 53000 # average overall mshr miss latency
|
||||
system.cpu.icache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.icache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.icache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.icache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.dcache.replacements 437952 # number of replacements
|
||||
system.cpu.dcache.tagsinuse 4094.896939 # Cycle average of tags in use
|
||||
system.cpu.dcache.total_refs 606786134 # Total number of references to valid blocks.
|
||||
system.cpu.dcache.sampled_refs 442048 # Sample count of references to valid blocks.
|
||||
system.cpu.dcache.avg_refs 1372.670239 # Average number of references to valid blocks.
|
||||
system.cpu.dcache.warmup_cycle 778540000 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.dcache.occ_blocks::0 4094.896939 # Average occupied blocks per context
|
||||
system.cpu.dcache.occ_percent::0 0.999731 # Average percentage of cache occupancy
|
||||
system.cpu.dcache.ReadReq_hits 418844799 # number of ReadReq hits
|
||||
system.cpu.dcache.WriteReq_hits 187941335 # number of WriteReq hits
|
||||
system.cpu.dcache.demand_hits 606786134 # number of demand (read+write) hits
|
||||
system.cpu.dcache.overall_hits 606786134 # number of overall hits
|
||||
system.cpu.dcache.ReadReq_misses 197326 # number of ReadReq misses
|
||||
system.cpu.dcache.WriteReq_misses 244722 # number of WriteReq misses
|
||||
system.cpu.dcache.demand_misses 442048 # number of demand (read+write) misses
|
||||
system.cpu.dcache.overall_misses 442048 # number of overall misses
|
||||
system.cpu.dcache.ReadReq_miss_latency 4043270000 # number of ReadReq miss cycles
|
||||
system.cpu.dcache.WriteReq_miss_latency 5872734000 # number of WriteReq miss cycles
|
||||
system.cpu.dcache.demand_miss_latency 9916004000 # number of demand (read+write) miss cycles
|
||||
system.cpu.dcache.overall_miss_latency 9916004000 # number of overall miss cycles
|
||||
system.cpu.dcache.ReadReq_accesses 419042125 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.dcache.WriteReq_accesses 188186057 # number of WriteReq accesses(hits+misses)
|
||||
system.cpu.dcache.demand_accesses 607228182 # number of demand (read+write) accesses
|
||||
system.cpu.dcache.overall_accesses 607228182 # number of overall (read+write) accesses
|
||||
system.cpu.dcache.ReadReq_miss_rate 0.000471 # miss rate for ReadReq accesses
|
||||
system.cpu.dcache.WriteReq_miss_rate 0.001300 # miss rate for WriteReq accesses
|
||||
system.cpu.dcache.demand_miss_rate 0.000728 # miss rate for demand accesses
|
||||
system.cpu.dcache.overall_miss_rate 0.000728 # miss rate for overall accesses
|
||||
system.cpu.dcache.ReadReq_avg_miss_latency 20490.305383 # average ReadReq miss latency
|
||||
system.cpu.dcache.WriteReq_avg_miss_latency 23997.572756 # average WriteReq miss latency
|
||||
system.cpu.dcache.demand_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.overall_avg_miss_latency 22431.962140 # average overall miss latency
|
||||
system.cpu.dcache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.dcache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.dcache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.dcache.writebacks 396372 # number of writebacks
|
||||
system.cpu.dcache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.dcache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.dcache.ReadReq_mshr_misses 197326 # number of ReadReq MSHR misses
|
||||
system.cpu.dcache.WriteReq_mshr_misses 244722 # number of WriteReq MSHR misses
|
||||
system.cpu.dcache.demand_mshr_misses 442048 # number of demand (read+write) MSHR misses
|
||||
system.cpu.dcache.overall_mshr_misses 442048 # number of overall MSHR misses
|
||||
system.cpu.dcache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.dcache.ReadReq_mshr_miss_latency 3451292000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.dcache.WriteReq_mshr_miss_latency 5138568000 # number of WriteReq MSHR miss cycles
|
||||
system.cpu.dcache.demand_mshr_miss_latency 8589860000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_miss_latency 8589860000 # number of overall MSHR miss cycles
|
||||
system.cpu.dcache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.dcache.ReadReq_mshr_miss_rate 0.000471 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.dcache.WriteReq_mshr_miss_rate 0.001300 # mshr miss rate for WriteReq accesses
|
||||
system.cpu.dcache.demand_mshr_miss_rate 0.000728 # mshr miss rate for demand accesses
|
||||
system.cpu.dcache.overall_mshr_miss_rate 0.000728 # mshr miss rate for overall accesses
|
||||
system.cpu.dcache.ReadReq_avg_mshr_miss_latency 17490.305383 # average ReadReq mshr miss latency
|
||||
system.cpu.dcache.WriteReq_avg_mshr_miss_latency 20997.572756 # average WriteReq mshr miss latency
|
||||
system.cpu.dcache.demand_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_miss_latency 19431.962140 # average overall mshr miss latency
|
||||
system.cpu.dcache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.dcache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.dcache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.dcache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.replacements 71208 # number of replacements
|
||||
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.avg_refs 4.873826 # Average number of references to valid blocks.
|
||||
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.occ_blocks::0 1869.199731 # Average occupied blocks per context
|
||||
system.cpu.l2cache.occ_blocks::1 16187.723361 # Average occupied blocks per context
|
||||
system.cpu.l2cache.occ_percent::0 0.057043 # Average percentage of cache occupancy
|
||||
system.cpu.l2cache.occ_percent::1 0.494010 # Average percentage of cache occupancy
|
||||
system.cpu.l2cache.ReadReq_hits 166833 # number of ReadReq hits
|
||||
system.cpu.l2cache.Writeback_hits 396372 # number of Writeback hits
|
||||
system.cpu.l2cache.ReadExReq_hits 186469 # number of ReadExReq hits
|
||||
system.cpu.l2cache.demand_hits 353302 # number of demand (read+write) hits
|
||||
system.cpu.l2cache.overall_hits 353302 # number of overall hits
|
||||
system.cpu.l2cache.ReadReq_misses 31215 # number of ReadReq misses
|
||||
system.cpu.l2cache.ReadExReq_misses 58253 # number of ReadExReq misses
|
||||
system.cpu.l2cache.demand_misses 89468 # number of demand (read+write) misses
|
||||
system.cpu.l2cache.overall_misses 89468 # number of overall misses
|
||||
system.cpu.l2cache.ReadReq_miss_latency 1623180000 # number of ReadReq miss cycles
|
||||
system.cpu.l2cache.ReadExReq_miss_latency 3029156000 # number of ReadExReq miss cycles
|
||||
system.cpu.l2cache.demand_miss_latency 4652336000 # number of demand (read+write) miss cycles
|
||||
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
|
||||
system.cpu.l2cache.ReadReq_accesses 198048 # number of ReadReq accesses(hits+misses)
|
||||
system.cpu.l2cache.Writeback_accesses 396372 # number of Writeback accesses(hits+misses)
|
||||
system.cpu.l2cache.ReadExReq_accesses 244722 # number of ReadExReq accesses(hits+misses)
|
||||
system.cpu.l2cache.demand_accesses 442770 # number of demand (read+write) accesses
|
||||
system.cpu.l2cache.overall_accesses 442770 # number of overall (read+write) accesses
|
||||
system.cpu.l2cache.ReadReq_miss_rate 0.157613 # miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadExReq_miss_rate 0.238037 # miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.demand_miss_rate 0.202064 # miss rate for demand accesses
|
||||
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
|
||||
system.cpu.l2cache.ReadReq_avg_miss_latency 52000 # average ReadReq miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_miss_latency 52000 # average ReadExReq miss latency
|
||||
system.cpu.l2cache.demand_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.overall_avg_miss_latency 52000 # average overall miss latency
|
||||
system.cpu.l2cache.blocked_cycles::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked_cycles::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_mshrs 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.blocked::no_targets 0 # number of cycles access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_mshrs no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.avg_blocked_cycles::no_targets no_value # average number of cycles each access was blocked
|
||||
system.cpu.l2cache.fast_writes 0 # number of fast writes performed
|
||||
system.cpu.l2cache.cache_copies 0 # number of cache copies performed
|
||||
system.cpu.l2cache.writebacks 58007 # number of writebacks
|
||||
system.cpu.l2cache.demand_mshr_hits 0 # number of demand (read+write) MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.ReadReq_mshr_misses 31215 # number of ReadReq MSHR misses
|
||||
system.cpu.l2cache.ReadExReq_mshr_misses 58253 # number of ReadExReq MSHR misses
|
||||
system.cpu.l2cache.demand_mshr_misses 89468 # number of demand (read+write) MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_latency 1248600000 # number of ReadReq MSHR miss cycles
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_latency 2330120000 # number of ReadExReq MSHR miss cycles
|
||||
system.cpu.l2cache.demand_mshr_miss_latency 3578720000 # number of demand (read+write) MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.l2cache.ReadReq_mshr_miss_rate 0.157613 # mshr miss rate for ReadReq accesses
|
||||
system.cpu.l2cache.ReadExReq_mshr_miss_rate 0.238037 # mshr miss rate for ReadExReq accesses
|
||||
system.cpu.l2cache.demand_mshr_miss_rate 0.202064 # mshr miss rate for demand accesses
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.ReadReq_avg_mshr_miss_latency 40000 # average ReadReq mshr miss latency
|
||||
system.cpu.l2cache.ReadExReq_avg_mshr_miss_latency 40000 # average ReadExReq mshr miss latency
|
||||
system.cpu.l2cache.demand_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_miss_latency 40000 # average overall mshr miss latency
|
||||
system.cpu.l2cache.overall_avg_mshr_uncacheable_latency no_value # average overall mshr uncacheable latency
|
||||
system.cpu.l2cache.overall_hits 353302 # number of overall hits
|
||||
system.cpu.l2cache.overall_miss_latency 4652336000 # number of overall miss cycles
|
||||
system.cpu.l2cache.overall_miss_rate 0.202064 # miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_misses 89468 # number of overall misses
|
||||
system.cpu.l2cache.overall_mshr_hits 0 # number of overall MSHR hits
|
||||
system.cpu.l2cache.overall_mshr_miss_latency 3578720000 # number of overall MSHR miss cycles
|
||||
system.cpu.l2cache.overall_mshr_miss_rate 0.202064 # mshr miss rate for overall accesses
|
||||
system.cpu.l2cache.overall_mshr_misses 89468 # number of overall MSHR misses
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_latency 0 # number of overall MSHR uncacheable cycles
|
||||
system.cpu.l2cache.overall_mshr_uncacheable_misses 0 # number of overall MSHR uncacheable misses
|
||||
system.cpu.l2cache.replacements 71208 # number of replacements
|
||||
system.cpu.l2cache.sampled_refs 86793 # Sample count of references to valid blocks.
|
||||
system.cpu.l2cache.mshr_cap_events 0 # number of times MSHR cap was activated
|
||||
system.cpu.l2cache.soft_prefetch_mshr_full 0 # number of mshr full events for SW prefetching instrutions
|
||||
system.cpu.l2cache.tagsinuse 18056.923092 # Cycle average of tags in use
|
||||
system.cpu.l2cache.total_refs 423014 # Total number of references to valid blocks.
|
||||
system.cpu.l2cache.warmup_cycle 0 # Cycle when the warmup percentage was hit.
|
||||
system.cpu.l2cache.writebacks 58007 # number of writebacks
|
||||
system.cpu.not_idle_fraction 1 # Percentage of non-idle cycles
|
||||
system.cpu.numCycles 3606517174 # number of cpu cycles simulated
|
||||
system.cpu.numWorkItemsCompleted 0 # number of work items this cpu completed
|
||||
system.cpu.numWorkItemsStarted 0 # number of work items this cpu started
|
||||
system.cpu.num_busy_cycles 3606517174 # Number of busy cycles
|
||||
system.cpu.num_conditional_control_insts 99478861 # number of instructions that are conditional controls
|
||||
system.cpu.num_fp_alu_accesses 0 # Number of float alu accesses
|
||||
system.cpu.num_fp_insts 0 # number of float instructions
|
||||
system.cpu.num_fp_register_reads 0 # number of times the floating registers were read
|
||||
system.cpu.num_fp_register_writes 0 # number of times the floating registers were written
|
||||
system.cpu.num_func_calls 0 # number of times a function call or return occured
|
||||
system.cpu.num_idle_cycles 0 # Number of idle cycles
|
||||
system.cpu.num_insts 1621493983 # Number of instructions executed
|
||||
system.cpu.num_int_alu_accesses 1621354493 # Number of integer alu accesses
|
||||
system.cpu.num_int_insts 1621354493 # number of integer instructions
|
||||
system.cpu.num_int_register_reads 3953866002 # number of times the integer registers were read
|
||||
system.cpu.num_int_register_writes 1617994650 # number of times the integer registers were written
|
||||
system.cpu.num_load_insts 419042125 # Number of load instructions
|
||||
system.cpu.num_mem_refs 607228182 # number of memory refs
|
||||
system.cpu.num_store_insts 188186057 # Number of store instructions
|
||||
system.cpu.workload.num_syscalls 48 # Number of system calls
|
||||
system.cpu.l2cache.no_allocate_misses 0 # Number of misses that were no-allocate
|
||||
|
||||
---------- End Simulation Statistics ----------
|
||||
|
||||
@@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
|
||||
type=LinuxArmSystem
|
||||
children=bridge cf0 cpu0 cpu1 intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
|
||||
boot_cpu_frequency=500
|
||||
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
|
||||
boot_loader=/dist/m5/system/binaries/boot.arm
|
||||
boot_loader_mem=system.nvmem
|
||||
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
|
||||
flags_addr=268435504
|
||||
gic_cpu_addr=520093952
|
||||
init_param=0
|
||||
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
load_addr_mask=268435455
|
||||
machine_type=RealView_PBX
|
||||
mem_mode=timing
|
||||
memories=system.nvmem system.physmem
|
||||
midr_regval=890224640
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
symbolfile=
|
||||
@@ -63,7 +64,7 @@ table_size=65536
|
||||
|
||||
[system.cf0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
|
||||
image_file=/dist/m5/system/disks/linux-arm-ael.img
|
||||
read_only=true
|
||||
|
||||
[system.cpu0]
|
||||
@@ -1495,6 +1496,7 @@ port=system.l2c.cpu_side system.cpu0.icache.mem_side system.cpu0.dcache.mem_side
|
||||
|
||||
[system.vncserver]
|
||||
type=VncServer
|
||||
frame_capture=false
|
||||
number=0
|
||||
port=5900
|
||||
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simout
|
||||
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:32:34
|
||||
gem5 started Nov 22 2011 02:00:14
|
||||
gem5 executing on u200540-lin
|
||||
gem5 compiled Jan 8 2012 22:12:58
|
||||
gem5 started Jan 9 2012 03:33:38
|
||||
gem5 executing on zizzer
|
||||
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3-dual
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: Using bootloader at address 0x80000000
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 2582494395500 because m5_exit instruction encountered
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
sim_seconds 2.582494 # Number of seconds simulated
|
||||
sim_ticks 2582494395500 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 86259 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2789337609 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 380504 # Number of bytes of host memory used
|
||||
host_seconds 925.85 # Real time elapsed on the host
|
||||
host_inst_rate 65512 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2118472138 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 384260 # Number of bytes of host memory used
|
||||
host_seconds 1219.04 # Real time elapsed on the host
|
||||
sim_insts 79862069 # Number of instructions simulated
|
||||
system.l2c.replacements 132200 # number of replacements
|
||||
system.l2c.tagsinuse 27582.989225 # Cycle average of tags in use
|
||||
@@ -312,12 +312,12 @@ system.cpu0.rename.ROBFullEvents 1483 # Nu
|
||||
system.cpu0.rename.IQFullEvents 580883 # Number of times rename has blocked due to IQ full
|
||||
system.cpu0.rename.LSQFullEvents 3149232 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu0.rename.FullRegisterEvents 205 # Number of times there has been no free registers
|
||||
system.cpu0.rename.RenamedOperands 54779836 # Number of destination operands rename has renamed
|
||||
system.cpu0.rename.RenamedOperands 54779837 # Number of destination operands rename has renamed
|
||||
system.cpu0.rename.RenameLookups 247536349 # Number of register rename lookups that rename has made
|
||||
system.cpu0.rename.int_rename_lookups 247487579 # Number of integer rename lookups
|
||||
system.cpu0.rename.fp_rename_lookups 48770 # Number of floating rename lookups
|
||||
system.cpu0.rename.CommittedMaps 41441157 # Number of HB maps that are committed
|
||||
system.cpu0.rename.UndoneMaps 13338678 # Number of HB maps that are undone due to squashing
|
||||
system.cpu0.rename.UndoneMaps 13338679 # Number of HB maps that are undone due to squashing
|
||||
system.cpu0.rename.serializingInsts 828868 # count of serializing insts renamed
|
||||
system.cpu0.rename.tempSerializingInsts 763855 # count of temporary serializing insts renamed
|
||||
system.cpu0.rename.skidInsts 8500592 # count of insts added to the skid buffer
|
||||
@@ -325,13 +325,13 @@ system.cpu0.memDep0.insertedLoads 11770384 # Nu
|
||||
system.cpu0.memDep0.insertedStores 7686805 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu0.memDep0.conflictingLoads 1443183 # Number of conflicting loads.
|
||||
system.cpu0.memDep0.conflictingStores 1570137 # Number of conflicting stores.
|
||||
system.cpu0.iq.iqInstsAdded 50961906 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu0.iq.iqNonSpecInstsAdded 1297751 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu0.iq.iqInstsIssued 80276175 # Number of instructions issued
|
||||
system.cpu0.iq.iqInstsAdded 50961905 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu0.iq.iqNonSpecInstsAdded 1297752 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu0.iq.iqInstsIssued 80276174 # Number of instructions issued
|
||||
system.cpu0.iq.iqSquashedInstsIssued 137636 # Number of squashed instructions issued
|
||||
system.cpu0.iq.iqSquashedInstsExamined 9888896 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu0.iq.iqSquashedOperandsExamined 22816025 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu0.iq.iqSquashedNonSpecRemoved 253323 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu0.iq.iqSquashedNonSpecRemoved 253324 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu0.iq.issued_per_cycle::samples 109741052 # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::mean 0.731505 # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::stdev 1.440076 # Number of insts issued each cycle
|
||||
@@ -340,8 +340,8 @@ system.cpu0.iq.issued_per_cycle::0 80125799 73.01% 73.01% # Nu
|
||||
system.cpu0.iq.issued_per_cycle::1 10111373 9.21% 82.23% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::2 4133530 3.77% 85.99% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::3 3177611 2.90% 88.89% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::4 9954077 9.07% 97.96% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::5 1265280 1.15% 99.11% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::4 9954078 9.07% 97.96% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::5 1265279 1.15% 99.11% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::6 670333 0.61% 99.72% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::7 224189 0.20% 99.93% # Number of insts issued each cycle
|
||||
system.cpu0.iq.issued_per_cycle::8 78860 0.07% 100.00% # Number of insts issued each cycle
|
||||
@@ -384,7 +384,7 @@ system.cpu0.iq.fu_full::MemWrite 285533 3.56% 100.00% # at
|
||||
system.cpu0.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu0.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu0.iq.FU_type_0::No_OpClass 88461 0.11% 0.11% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntAlu 29731482 37.04% 37.15% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntAlu 29731481 37.04% 37.15% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntMult 62351 0.08% 37.22% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IntDiv 0 0.00% 37.22% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::FloatAdd 0 0.00% 37.22% # Type of FU issued
|
||||
@@ -417,17 +417,17 @@ system.cpu0.iq.FU_type_0::MemRead 43135014 53.73% 90.96% # Ty
|
||||
system.cpu0.iq.FU_type_0::MemWrite 7257159 9.04% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::total 80276175 # Type of FU issued
|
||||
system.cpu0.iq.FU_type_0::total 80276174 # Type of FU issued
|
||||
system.cpu0.iq.rate 0.227757 # Inst issue rate
|
||||
system.cpu0.iq.fu_busy_cnt 8028360 # FU busy when requested
|
||||
system.cpu0.iq.fu_busy_rate 0.100009 # FU busy rate (busy events/executed inst)
|
||||
system.cpu0.iq.int_inst_queue_reads 278513866 # Number of integer instruction queue reads
|
||||
system.cpu0.iq.int_inst_queue_reads 278513864 # Number of integer instruction queue reads
|
||||
system.cpu0.iq.int_inst_queue_writes 62161443 # Number of integer instruction queue writes
|
||||
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668616 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu0.iq.int_inst_queue_wakeup_accesses 46668615 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu0.iq.fp_inst_queue_reads 11568 # Number of floating instruction queue reads
|
||||
system.cpu0.iq.fp_inst_queue_writes 6980 # Number of floating instruction queue writes
|
||||
system.cpu0.iq.fp_inst_queue_wakeup_accesses 5172 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu0.iq.int_alu_accesses 88210043 # Number of integer alu accesses
|
||||
system.cpu0.iq.int_alu_accesses 88210042 # Number of integer alu accesses
|
||||
system.cpu0.iq.fp_alu_accesses 6031 # Number of floating point alu accesses
|
||||
system.cpu0.iew.lsq.thread0.forwLoads 399886 # Number of loads that had data forwarded from stores
|
||||
system.cpu0.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
@@ -447,14 +447,14 @@ system.cpu0.iew.iewDispatchedInsts 52433539 # Nu
|
||||
system.cpu0.iew.iewDispSquashedInsts 243567 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu0.iew.iewDispLoadInsts 11770384 # Number of dispatched load instructions
|
||||
system.cpu0.iew.iewDispStoreInsts 7686805 # Number of dispatched store instructions
|
||||
system.cpu0.iew.iewDispNonSpecInsts 865739 # Number of dispatched non-speculative instructions
|
||||
system.cpu0.iew.iewDispNonSpecInsts 865740 # Number of dispatched non-speculative instructions
|
||||
system.cpu0.iew.iewIQFullEvents 62160 # Number of times the IQ has become full, causing a stall
|
||||
system.cpu0.iew.iewLSQFullEvents 5553 # Number of times the LSQ has become full, causing a stall
|
||||
system.cpu0.iew.memOrderViolationEvents 20554 # Number of memory order violations
|
||||
system.cpu0.iew.predictedTakenIncorrect 507509 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu0.iew.predictedNotTakenIncorrect 136100 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu0.iew.branchMispredicts 643609 # Number of branch mispredicts detected at execute
|
||||
system.cpu0.iew.iewExecutedInsts 79551296 # Number of executed instructions
|
||||
system.cpu0.iew.iewExecutedInsts 79551295 # Number of executed instructions
|
||||
system.cpu0.iew.iewExecLoadInsts 42843907 # Number of load instructions executed
|
||||
system.cpu0.iew.iewExecSquashedInsts 724879 # Number of squashed instructions skipped in execute
|
||||
system.cpu0.iew.exec_swp 0 # number of swp insts executed
|
||||
@@ -463,8 +463,8 @@ system.cpu0.iew.exec_refs 50011427 # nu
|
||||
system.cpu0.iew.exec_branches 6433542 # Number of branches executed
|
||||
system.cpu0.iew.exec_stores 7167520 # Number of stores executed
|
||||
system.cpu0.iew.exec_rate 0.225700 # Inst execution rate
|
||||
system.cpu0.iew.wb_sent 79133798 # cumulative count of insts sent to commit
|
||||
system.cpu0.iew.wb_count 46673788 # cumulative count of insts written-back
|
||||
system.cpu0.iew.wb_sent 79133797 # cumulative count of insts sent to commit
|
||||
system.cpu0.iew.wb_count 46673787 # cumulative count of insts written-back
|
||||
system.cpu0.iew.wb_producers 24793926 # num instructions producing a value
|
||||
system.cpu0.iew.wb_consumers 46078393 # num instructions consuming a value
|
||||
system.cpu0.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
@@ -514,8 +514,8 @@ system.cpu0.cpi 8.431852 # CP
|
||||
system.cpu0.cpi_total 8.431852 # CPI: Total CPI of All Threads
|
||||
system.cpu0.ipc 0.118598 # IPC: Instructions Per Cycle
|
||||
system.cpu0.ipc_total 0.118598 # IPC: Total IPC of All Threads
|
||||
system.cpu0.int_regfile_reads 354175082 # number of integer regfile reads
|
||||
system.cpu0.int_regfile_writes 46137252 # number of integer regfile writes
|
||||
system.cpu0.int_regfile_reads 354175079 # number of integer regfile reads
|
||||
system.cpu0.int_regfile_writes 46137251 # number of integer regfile writes
|
||||
system.cpu0.fp_regfile_reads 4205 # number of floating regfile reads
|
||||
system.cpu0.fp_regfile_writes 1348 # number of floating regfile writes
|
||||
system.cpu0.misc_regfile_reads 65629786 # number of misc regfile reads
|
||||
|
||||
Binary file not shown.
@@ -9,18 +9,19 @@ time_sync_spin_threshold=100000000
|
||||
type=LinuxArmSystem
|
||||
children=bridge cf0 cpu intrctrl iobus iocache l2c membus nvmem physmem realview terminal toL2Bus vncserver
|
||||
boot_cpu_frequency=500
|
||||
boot_loader=/projects/pd/randd/dist/binaries/boot.arm
|
||||
boot_loader=/dist/m5/system/binaries/boot.arm
|
||||
boot_loader_mem=system.nvmem
|
||||
boot_osflags=earlyprintk console=ttyAMA0 lpj=19988480 norandmaps rw loglevel=8 mem=128MB root=/dev/sda1
|
||||
flags_addr=268435504
|
||||
gic_cpu_addr=520093952
|
||||
init_param=0
|
||||
kernel=/projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
kernel=/dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
load_addr_mask=268435455
|
||||
machine_type=RealView_PBX
|
||||
mem_mode=timing
|
||||
memories=system.nvmem system.physmem
|
||||
midr_regval=890224640
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
symbolfile=
|
||||
@@ -63,7 +64,7 @@ table_size=65536
|
||||
|
||||
[system.cf0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-arm-ael.img
|
||||
image_file=/dist/m5/system/disks/linux-arm-ael.img
|
||||
read_only=true
|
||||
|
||||
[system.cpu]
|
||||
@@ -1041,6 +1042,7 @@ port=system.l2c.cpu_side system.cpu.icache.mem_side system.cpu.dcache.mem_side s
|
||||
|
||||
[system.vncserver]
|
||||
type=VncServer
|
||||
frame_capture=false
|
||||
number=0
|
||||
port=5900
|
||||
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
Redirecting stdout to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simout
|
||||
Redirecting stderr to build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3/simerr
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:32:34
|
||||
gem5 started Nov 22 2011 02:00:08
|
||||
gem5 executing on u200540-lin
|
||||
gem5 compiled Jan 8 2012 22:12:58
|
||||
gem5 started Jan 9 2012 03:32:35
|
||||
gem5 executing on zizzer
|
||||
command line: build/ARM_FS/gem5.opt -d build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3 -re tests/run.py build/ARM_FS/tests/opt/long/10.linux-boot/arm/linux/realview-o3
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: kernel located at: /dist/m5/system/binaries/vmlinux.arm.smp.fb.2.6.38.8
|
||||
info: Using bootloader at address 0x80000000
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 2503566110500 because m5_exit instruction encountered
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
sim_seconds 2.503566 # Number of seconds simulated
|
||||
sim_ticks 2503566110500 # Number of ticks simulated
|
||||
sim_freq 1000000000000 # Frequency of simulated ticks
|
||||
host_inst_rate 84156 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2743719152 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 380536 # Number of bytes of host memory used
|
||||
host_seconds 912.47 # Real time elapsed on the host
|
||||
host_inst_rate 72389 # Simulator instruction rate (inst/s)
|
||||
host_tick_rate 2360079964 # Simulator tick rate (ticks/s)
|
||||
host_mem_usage 384076 # Number of bytes of host memory used
|
||||
host_seconds 1060.80 # Real time elapsed on the host
|
||||
sim_insts 76790007 # Number of instructions simulated
|
||||
system.l2c.replacements 119509 # number of replacements
|
||||
system.l2c.tagsinuse 25929.897253 # Cycle average of tags in use
|
||||
@@ -270,12 +270,12 @@ system.cpu.rename.ROBFullEvents 4400 # Nu
|
||||
system.cpu.rename.IQFullEvents 914485 # Number of times rename has blocked due to IQ full
|
||||
system.cpu.rename.LSQFullEvents 3979731 # Number of times rename has blocked due to LSQ full
|
||||
system.cpu.rename.FullRegisterEvents 42252 # Number of times there has been no free registers
|
||||
system.cpu.rename.RenamedOperands 118358542 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenamedOperands 118358543 # Number of destination operands rename has renamed
|
||||
system.cpu.rename.RenameLookups 523323093 # Number of register rename lookups that rename has made
|
||||
system.cpu.rename.int_rename_lookups 523225639 # Number of integer rename lookups
|
||||
system.cpu.rename.fp_rename_lookups 97454 # Number of floating rename lookups
|
||||
system.cpu.rename.CommittedMaps 77492718 # Number of HB maps that are committed
|
||||
system.cpu.rename.UndoneMaps 40865823 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.UndoneMaps 40865824 # Number of HB maps that are undone due to squashing
|
||||
system.cpu.rename.serializingInsts 1204637 # count of serializing insts renamed
|
||||
system.cpu.rename.tempSerializingInsts 1098724 # count of temporary serializing insts renamed
|
||||
system.cpu.rename.skidInsts 12304657 # count of insts added to the skid buffer
|
||||
@@ -283,13 +283,13 @@ system.cpu.memDep0.insertedLoads 21982315 # Nu
|
||||
system.cpu.memDep0.insertedStores 14168730 # Number of stores inserted to the mem dependence unit.
|
||||
system.cpu.memDep0.conflictingLoads 1896802 # Number of conflicting loads.
|
||||
system.cpu.memDep0.conflictingStores 2281380 # Number of conflicting stores.
|
||||
system.cpu.iq.iqInstsAdded 102860212 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 1874615 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 126873317 # Number of instructions issued
|
||||
system.cpu.iq.iqInstsAdded 102860211 # Number of instructions added to the IQ (excludes non-spec)
|
||||
system.cpu.iq.iqNonSpecInstsAdded 1874616 # Number of non-speculative instructions added to the IQ
|
||||
system.cpu.iq.iqInstsIssued 126873316 # Number of instructions issued
|
||||
system.cpu.iq.iqSquashedInstsIssued 252471 # Number of squashed instructions issued
|
||||
system.cpu.iq.iqSquashedInstsExamined 26973483 # Number of squashed instructions iterated over during squash; mainly for profiling
|
||||
system.cpu.iq.iqSquashedOperandsExamined 72956952 # Number of squashed operands that are examined and possibly removed from graph
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 374922 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.iqSquashedNonSpecRemoved 374923 # Number of squashed non-spec instructions that were removed
|
||||
system.cpu.iq.issued_per_cycle::samples 155542524 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::mean 0.815683 # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::stdev 1.505358 # Number of insts issued each cycle
|
||||
@@ -298,8 +298,8 @@ system.cpu.iq.issued_per_cycle::0 108919716 70.03% 70.03% # Nu
|
||||
system.cpu.iq.issued_per_cycle::1 15115277 9.72% 79.74% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::2 7538109 4.85% 84.59% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::3 6517896 4.19% 88.78% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 12766128 8.21% 96.99% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::5 2735747 1.76% 98.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::4 12766129 8.21% 96.99% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::5 2735746 1.76% 98.75% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::6 1395145 0.90% 99.64% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::7 422031 0.27% 99.91% # Number of insts issued each cycle
|
||||
system.cpu.iq.issued_per_cycle::8 132475 0.09% 100.00% # Number of insts issued each cycle
|
||||
@@ -342,7 +342,7 @@ system.cpu.iq.fu_full::MemWrite 436630 4.91% 100.00% # at
|
||||
system.cpu.iq.fu_full::IprAccess 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.fu_full::InstPrefetch 0 0.00% 100.00% # attempts to use FU when none available
|
||||
system.cpu.iq.FU_type_0::No_OpClass 106530 0.08% 0.08% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 60069483 47.35% 47.43% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntAlu 60069482 47.35% 47.43% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntMult 96615 0.08% 47.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IntDiv 0 0.00% 47.51% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::FloatAdd 0 0.00% 47.51% # Type of FU issued
|
||||
@@ -375,17 +375,17 @@ system.cpu.iq.FU_type_0::MemRead 53942685 42.52% 90.02% # Ty
|
||||
system.cpu.iq.FU_type_0::MemWrite 12655733 9.98% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::IprAccess 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::InstPrefetch 0 0.00% 100.00% # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 126873317 # Type of FU issued
|
||||
system.cpu.iq.FU_type_0::total 126873316 # Type of FU issued
|
||||
system.cpu.iq.rate 0.305048 # Inst issue rate
|
||||
system.cpu.iq.fu_busy_cnt 8900311 # FU busy when requested
|
||||
system.cpu.iq.fu_busy_rate 0.070151 # FU busy rate (busy events/executed inst)
|
||||
system.cpu.iq.int_inst_queue_reads 418533130 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_reads 418533128 # Number of integer instruction queue reads
|
||||
system.cpu.iq.int_inst_queue_writes 131726191 # Number of integer instruction queue writes
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 87292109 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.int_inst_queue_wakeup_accesses 87292108 # Number of integer instruction queue wakeup accesses
|
||||
system.cpu.iq.fp_inst_queue_reads 24017 # Number of floating instruction queue reads
|
||||
system.cpu.iq.fp_inst_queue_writes 13690 # Number of floating instruction queue writes
|
||||
system.cpu.iq.fp_inst_queue_wakeup_accesses 10446 # Number of floating instruction queue wakeup accesses
|
||||
system.cpu.iq.int_alu_accesses 135654306 # Number of integer alu accesses
|
||||
system.cpu.iq.int_alu_accesses 135654305 # Number of integer alu accesses
|
||||
system.cpu.iq.fp_alu_accesses 12792 # Number of floating point alu accesses
|
||||
system.cpu.iew.lsq.thread0.forwLoads 614767 # Number of loads that had data forwarded from stores
|
||||
system.cpu.iew.lsq.thread0.invAddrLoads 0 # Number of loads ignored due to an invalid address
|
||||
@@ -405,14 +405,14 @@ system.cpu.iew.iewDispatchedInsts 104949442 # Nu
|
||||
system.cpu.iew.iewDispSquashedInsts 473979 # Number of squashed instructions skipped by dispatch
|
||||
system.cpu.iew.iewDispLoadInsts 21982315 # Number of dispatched load instructions
|
||||
system.cpu.iew.iewDispStoreInsts 14168730 # Number of dispatched store instructions
|
||||
system.cpu.iew.iewDispNonSpecInsts 1228030 # Number of dispatched non-speculative instructions
|
||||
system.cpu.iew.iewDispNonSpecInsts 1228031 # Number of dispatched non-speculative instructions
|
||||
system.cpu.iew.iewIQFullEvents 85187 # Number of times the IQ has become full, causing a stall
|
||||
system.cpu.iew.iewLSQFullEvents 7556 # Number of times the LSQ has become full, causing a stall
|
||||
system.cpu.iew.memOrderViolationEvents 32657 # Number of memory order violations
|
||||
system.cpu.iew.predictedTakenIncorrect 850397 # Number of branches that were predicted taken incorrectly
|
||||
system.cpu.iew.predictedNotTakenIncorrect 257130 # Number of branches that were predicted not taken incorrectly
|
||||
system.cpu.iew.branchMispredicts 1107527 # Number of branch mispredicts detected at execute
|
||||
system.cpu.iew.iewExecutedInsts 123429780 # Number of executed instructions
|
||||
system.cpu.iew.iewExecutedInsts 123429779 # Number of executed instructions
|
||||
system.cpu.iew.iewExecLoadInsts 52914304 # Number of load instructions executed
|
||||
system.cpu.iew.iewExecSquashedInsts 3443537 # Number of squashed instructions skipped in execute
|
||||
system.cpu.iew.exec_swp 0 # number of swp insts executed
|
||||
@@ -421,8 +421,8 @@ system.cpu.iew.exec_refs 65401525 # nu
|
||||
system.cpu.iew.exec_branches 11705842 # Number of branches executed
|
||||
system.cpu.iew.exec_stores 12487221 # Number of stores executed
|
||||
system.cpu.iew.exec_rate 0.296769 # Inst execution rate
|
||||
system.cpu.iew.wb_sent 121771134 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 87302555 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_sent 121771133 # cumulative count of insts sent to commit
|
||||
system.cpu.iew.wb_count 87302554 # cumulative count of insts written-back
|
||||
system.cpu.iew.wb_producers 47043389 # num instructions producing a value
|
||||
system.cpu.iew.wb_consumers 86638668 # num instructions consuming a value
|
||||
system.cpu.iew.wb_penalized 0 # number of instrctions required to write to 'other' IQ
|
||||
@@ -472,8 +472,8 @@ system.cpu.cpi 5.416227 # CP
|
||||
system.cpu.cpi_total 5.416227 # CPI: Total CPI of All Threads
|
||||
system.cpu.ipc 0.184630 # IPC: Instructions Per Cycle
|
||||
system.cpu.ipc_total 0.184630 # IPC: Total IPC of All Threads
|
||||
system.cpu.int_regfile_reads 559625789 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 89694790 # number of integer regfile writes
|
||||
system.cpu.int_regfile_reads 559625786 # number of integer regfile reads
|
||||
system.cpu.int_regfile_writes 89694789 # number of integer regfile writes
|
||||
system.cpu.fp_regfile_reads 8322 # number of floating regfile reads
|
||||
system.cpu.fp_regfile_writes 2832 # number of floating regfile writes
|
||||
system.cpu.misc_regfile_reads 137256850 # number of misc regfile reads
|
||||
|
||||
Binary file not shown.
@@ -15,10 +15,11 @@ e820_table=system.e820_table
|
||||
init_param=0
|
||||
intel_mp_pointer=system.intel_mp_pointer
|
||||
intel_mp_table=system.intel_mp_table
|
||||
kernel=/projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
|
||||
kernel=/scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
|
||||
load_addr_mask=18446744073709551615
|
||||
mem_mode=timing
|
||||
memories=system.physmem
|
||||
num_work_ids=16
|
||||
physmem=system.physmem
|
||||
readfile=tests/halt.sh
|
||||
smbios_table=system.smbios_table
|
||||
@@ -1301,7 +1302,7 @@ table_size=65536
|
||||
|
||||
[system.pc.south_bridge.ide.disks0.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-x86.img
|
||||
image_file=/scratch/nilay/GEM5/system/disks/linux-x86.img
|
||||
read_only=true
|
||||
|
||||
[system.pc.south_bridge.ide.disks1]
|
||||
@@ -1321,7 +1322,7 @@ table_size=65536
|
||||
|
||||
[system.pc.south_bridge.ide.disks1.image.child]
|
||||
type=RawDiskImage
|
||||
image_file=/projects/pd/randd/dist/disks/linux-bigswap2.img
|
||||
image_file=/scratch/nilay/GEM5/system/disks/linux-bigswap2.img
|
||||
read_only=true
|
||||
|
||||
[system.pc.south_bridge.int_lines0]
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
gem5 Simulator System. http://gem5.org
|
||||
gem5 is copyrighted software; use the --copyright option for details.
|
||||
|
||||
gem5 compiled Nov 21 2011 16:24:08
|
||||
gem5 started Nov 21 2011 23:30:30
|
||||
gem5 executing on u200540-lin
|
||||
command line: build/X86_FS/gem5.opt -d build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/opt/long/10.linux-boot/x86/linux/pc-o3-timing
|
||||
gem5 compiled Jan 9 2012 20:47:38
|
||||
gem5 started Jan 9 2012 21:13:16
|
||||
gem5 executing on ribera.cs.wisc.edu
|
||||
command line: build/X86_FS/gem5.fast -d build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing -re tests/run.py build/X86_FS/tests/fast/long/10.linux-boot/x86/linux/pc-o3-timing
|
||||
warning: add_child('terminal'): child 'terminal' already has parent
|
||||
Global frequency set at 1000000000000 ticks per second
|
||||
info: kernel located at: /projects/pd/randd/dist/binaries/x86_64-vmlinux-2.6.22.9
|
||||
0: rtc: Real-time clock set to Sun Jan 1 00:00:00 2012
|
||||
info: kernel located at: /scratch/nilay/GEM5/system/binaries/x86_64-vmlinux-2.6.22.9
|
||||
info: Entering event queue @ 0. Starting simulation...
|
||||
Exiting @ tick 5145286546500 because m5_exit instruction encountered
|
||||
Exiting @ tick 5161177988500 because m5_exit instruction encountered
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user