Merge ktlim@zamp:./local/clean/o3-merge/m5
into zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem
configs/boot/micro_memlat.rcS:
configs/boot/micro_tlblat.rcS:
src/arch/alpha/ev5.cc:
src/arch/alpha/isa/decoder.isa:
src/arch/alpha/isa_traits.hh:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.hh:
src/cpu/checker/cpu.hh:
src/cpu/checker/cpu_impl.hh:
src/cpu/o3/alpha/cpu_impl.hh:
src/cpu/o3/alpha/params.hh:
src/cpu/o3/checker_builder.cc:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_impl.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/checker_builder.cc:
src/cpu/ozone/cpu.hh:
src/cpu/ozone/cpu_impl.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/front_end_impl.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_back_end_impl.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/lw_lsq_impl.hh:
src/cpu/ozone/thread_state.hh:
src/cpu/simple/base.cc:
src/cpu/simple_thread.cc:
src/cpu/simple_thread.hh:
src/cpu/thread_state.hh:
src/dev/ide_disk.cc:
src/python/m5/objects/O3CPU.py:
src/python/m5/objects/Root.py:
src/python/m5/objects/System.py:
src/sim/pseudo_inst.cc:
src/sim/pseudo_inst.hh:
src/sim/system.hh:
util/m5/m5.c:
Hand merge.
--HG--
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/o3/checker_builder.cc
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/ozone/checker_builder.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/simple/cpu.cc => src/cpu/simple/base.cc
rename : cpu/cpu_exec_context.cc => src/cpu/simple_thread.cc
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/O3CPU.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/system.hh => src/sim/system.hh
extra : convert_revision : 135d90e43f6cea89f9460ba4e23f4b0b85886e7d
This commit is contained in:
@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
|
||||
Param<int> cpu_id;
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
SimObjectParam<AlphaDTB *> dtb;
|
||||
Param<Tick> profile;
|
||||
#else
|
||||
SimObjectVectorParam<Process *> workload;
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread;
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
Param<unsigned> cachePorts;
|
||||
|
||||
@@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
|
||||
INIT_PARAM(cpu_id, "processor ID"),
|
||||
INIT_PARAM(itb, "Instruction translation buffer"),
|
||||
INIT_PARAM(dtb, "Data translation buffer"),
|
||||
INIT_PARAM(profile, ""),
|
||||
#else
|
||||
INIT_PARAM(workload, "Processes to run"),
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
|
||||
"Terminate when all threads have reached this load"
|
||||
"count",
|
||||
0),
|
||||
INIT_PARAM_DFLT(stats_reset_inst,
|
||||
"blah",
|
||||
0),
|
||||
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
|
||||
|
||||
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
|
||||
|
||||
@@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
|
||||
params->cpu_id = cpu_id;
|
||||
params->itb = itb;
|
||||
params->dtb = dtb;
|
||||
params->profile = profile;
|
||||
#else
|
||||
params->workload = workload;
|
||||
#endif // FULL_SYSTEM
|
||||
@@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU)
|
||||
params->max_insts_all_threads = max_insts_all_threads;
|
||||
params->max_loads_any_thread = max_loads_any_thread;
|
||||
params->max_loads_all_threads = max_loads_all_threads;
|
||||
params->stats_reset_inst = stats_reset_inst;
|
||||
params->progress_interval = progress_interval;
|
||||
|
||||
//
|
||||
// Caches
|
||||
|
||||
@@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
|
||||
Param<Counter> max_insts_all_threads;
|
||||
Param<Counter> max_loads_any_thread;
|
||||
Param<Counter> max_loads_all_threads;
|
||||
Param<Counter> stats_reset_inst;
|
||||
Param<Tick> progress_interval;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
SimObjectParam<AlphaITB *> itb;
|
||||
@@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
|
||||
|
||||
Param<bool> defer_registration;
|
||||
Param<bool> exitOnError;
|
||||
Param<bool> updateOnError;
|
||||
Param<bool> warnOnlyOnLoadError;
|
||||
Param<bool> function_trace;
|
||||
Param<Tick> function_trace_start;
|
||||
@@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
|
||||
"terminate when any thread reaches this load count"),
|
||||
INIT_PARAM(max_loads_all_threads,
|
||||
"terminate when all threads have reached this load count"),
|
||||
INIT_PARAM(stats_reset_inst,
|
||||
"blah"),
|
||||
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
|
||||
|
||||
#if FULL_SYSTEM
|
||||
INIT_PARAM(itb, "Instruction TLB"),
|
||||
@@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
|
||||
|
||||
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
|
||||
INIT_PARAM(exitOnError, "exit on error"),
|
||||
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
|
||||
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
|
||||
"result errors", false),
|
||||
INIT_PARAM(function_trace, "Enable function trace"),
|
||||
@@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker)
|
||||
params->max_insts_all_threads = 0;
|
||||
params->max_loads_any_thread = 0;
|
||||
params->max_loads_all_threads = 0;
|
||||
params->stats_reset_inst = 0;
|
||||
params->exitOnError = exitOnError;
|
||||
params->updateOnError = updateOnError;
|
||||
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
|
||||
params->deferRegistration = defer_registration;
|
||||
params->functionTrace = function_trace;
|
||||
@@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker)
|
||||
temp = max_insts_all_threads;
|
||||
temp = max_loads_any_thread;
|
||||
temp = max_loads_all_threads;
|
||||
temp = stats_reset_inst;
|
||||
Tick temp2 = progress_interval;
|
||||
params->progress_interval = 0;
|
||||
temp2++;
|
||||
|
||||
#if FULL_SYSTEM
|
||||
params->itb = itb;
|
||||
|
||||
@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
||||
|
||||
// Generate trap squash event.
|
||||
generateTrapEvent(tid);
|
||||
|
||||
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
|
||||
return false;
|
||||
}
|
||||
|
||||
updateComInstStats(head_inst);
|
||||
|
||||
#if FULL_SYSTEM
|
||||
if (thread[tid]->profile) {
|
||||
// bool usermode =
|
||||
// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
|
||||
// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
|
||||
thread[tid]->profilePC = head_inst->readPC();
|
||||
ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
|
||||
head_inst->staticInst);
|
||||
|
||||
if (node)
|
||||
thread[tid]->profileNode = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (head_inst->traceData) {
|
||||
head_inst->traceData->setFetchSeq(head_inst->seqNum);
|
||||
head_inst->traceData->setCPSeq(thread[tid]->numInst);
|
||||
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
||||
head_inst->renamedDestRegIdx(i));
|
||||
}
|
||||
|
||||
if (head_inst->isCopy())
|
||||
panic("Should not commit any copy instructions!");
|
||||
|
||||
// Finally clear the head ROB entry.
|
||||
rob->retireHead(tid);
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "config/use_checker.hh"
|
||||
|
||||
#if FULL_SYSTEM
|
||||
#include "cpu/quiesce_event.hh"
|
||||
#include "sim/system.hh"
|
||||
#else
|
||||
#include "sim/process.hh"
|
||||
@@ -793,6 +794,8 @@ template <class Impl>
|
||||
unsigned int
|
||||
FullO3CPU<Impl>::drain(Event *drain_event)
|
||||
{
|
||||
DPRINTF(O3CPU, "Switching out\n");
|
||||
BaseCPU::switchOut(_sampler);
|
||||
drainCount = 0;
|
||||
fetch.drain();
|
||||
decode.drain();
|
||||
@@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut()
|
||||
{
|
||||
fetch.switchOut();
|
||||
rename.switchOut();
|
||||
iew.switchOut();
|
||||
commit.switchOut();
|
||||
instList.clear();
|
||||
while (!removeList.empty()) {
|
||||
@@ -930,6 +934,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||
tickEvent.schedule(curTick);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::serialize(std::ostream &os)
|
||||
{
|
||||
BaseCPU::serialize(os);
|
||||
nameOut(os, csprintf("%s.tickEvent", name()));
|
||||
tickEvent.serialize(os);
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// write out the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
for (int i = 0; i < thread.size(); i++) {
|
||||
nameOut(os, csprintf("%s.xc.%i", name(), i));
|
||||
temp.copyXC(thread[i]->getXCProxy());
|
||||
temp.serialize(os);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string §ion)
|
||||
{
|
||||
BaseCPU::unserialize(cp, section);
|
||||
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
|
||||
|
||||
// Use SimpleThread's ability to checkpoint to make it easier to
|
||||
// read in the registers. Also make this static so it doesn't
|
||||
// get instantiated multiple times (causes a panic in statistics).
|
||||
static CPUExecContext temp;
|
||||
|
||||
for (int i = 0; i < thread.size(); i++) {
|
||||
temp.copyXC(thread[i]->getXCProxy());
|
||||
temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
|
||||
thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
FullO3CPU<Impl>::readIntReg(int reg_idx)
|
||||
|
||||
@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
|
||||
wroteToTimeBuffer = false;
|
||||
_status = Inactive;
|
||||
switchedOut = false;
|
||||
interruptPending = false;
|
||||
branchPred.takeOverFrom();
|
||||
}
|
||||
|
||||
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
|
||||
unsigned flags = 0;
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
if (cacheBlocked || (interruptPending && flags == 0)) {
|
||||
if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
|
||||
// Hold off fetch from getting new instructions when:
|
||||
// Cache is blocked, or
|
||||
// while an interrupt is pending and we're not in PAL mode, or
|
||||
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||
fetch_PC = next_PC;
|
||||
|
||||
if (instruction->isQuiesce()) {
|
||||
warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
|
||||
curTick);
|
||||
// warn("%lli: Quiesce instruction encountered, halting fetch!",
|
||||
// curTick);
|
||||
fetchStatus[tid] = QuiescePending;
|
||||
++numInst;
|
||||
status_change = true;
|
||||
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||
fetchStatus[tid] = TrapPending;
|
||||
status_change = true;
|
||||
|
||||
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
|
||||
// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
|
||||
#else // !FULL_SYSTEM
|
||||
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
|
||||
#endif // FULL_SYSTEM
|
||||
|
||||
@@ -216,6 +216,7 @@ class DefaultIEW
|
||||
if (++wbOutstanding == wbMax)
|
||||
ableToIssue = false;
|
||||
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
|
||||
assert(wbOutstanding <= wbMax);
|
||||
#ifdef DEBUG
|
||||
wbList.insert(sn);
|
||||
#endif
|
||||
@@ -226,6 +227,7 @@ class DefaultIEW
|
||||
if (wbOutstanding-- == wbMax)
|
||||
ableToIssue = true;
|
||||
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
|
||||
assert(wbOutstanding >= 0);
|
||||
#ifdef DEBUG
|
||||
assert(wbList.find(sn) != wbList.end());
|
||||
wbList.erase(sn);
|
||||
@@ -450,7 +452,9 @@ class DefaultIEW
|
||||
unsigned wbCycle;
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
unsigned wbOutstanding;
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
int wbOutstanding;
|
||||
|
||||
/** Writeback width. */
|
||||
unsigned wbWidth;
|
||||
@@ -507,6 +511,8 @@ class DefaultIEW
|
||||
Stats::Scalar<> iewExecutedInsts;
|
||||
/** Stat for total number of executed load instructions. */
|
||||
Stats::Vector<> iewExecLoadInsts;
|
||||
/** Stat for total number of executed store instructions. */
|
||||
// Stats::Scalar<> iewExecStoreInsts;
|
||||
/** Stat for total number of squashed instructions skipped at execute. */
|
||||
Stats::Scalar<> iewExecSquashedInsts;
|
||||
/** Number of executed software prefetches. */
|
||||
|
||||
@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
|
||||
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
|
||||
|
||||
iewExecutedInsts
|
||||
.name(name() + ".EXEC:insts")
|
||||
.name(name() + ".iewExecutedInsts")
|
||||
.desc("Number of executed instructions");
|
||||
|
||||
iewExecLoadInsts
|
||||
.init(cpu->number_of_threads)
|
||||
.name(name() + ".EXEC:loads")
|
||||
.name(name() + ".iewExecLoadInsts")
|
||||
.desc("Number of load instructions executed")
|
||||
.flags(total);
|
||||
|
||||
iewExecSquashedInsts
|
||||
.name(name() + ".EXEC:squashedInsts")
|
||||
.name(name() + ".iewExecSquashedInsts")
|
||||
.desc("Number of squashed instructions skipped in execute");
|
||||
|
||||
iewExecutedSwp
|
||||
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
|
||||
{
|
||||
// Clear any state.
|
||||
switchedOut = true;
|
||||
assert(insts[0].empty());
|
||||
assert(skidBuffer[0].empty());
|
||||
|
||||
instQueue.switchOut();
|
||||
ldstQueue.switchOut();
|
||||
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
|
||||
|
||||
updateLSQNextCycle = false;
|
||||
|
||||
// @todo: Fix hardcoded number
|
||||
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
|
||||
issueToExecQueue.advance();
|
||||
}
|
||||
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
|
||||
wbNumInst = 0;
|
||||
}
|
||||
|
||||
assert((wbCycle * wbWidth + wbNumInst) < wbMax);
|
||||
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
|
||||
}
|
||||
|
||||
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
|
||||
wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
|
||||
// Add finished instruction to queue to commit.
|
||||
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
|
||||
(*iewQueue)[wbCycle].size++;
|
||||
@@ -901,6 +904,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
|
||||
{
|
||||
while (!insts[tid].empty()) {
|
||||
if (insts[tid].front()->isLoad() ||
|
||||
insts[tid].front()->isStore() ) {
|
||||
toRename->iewInfo[tid].dispatchedToLSQ++;
|
||||
}
|
||||
|
||||
toRename->iewInfo[tid].dispatched++;
|
||||
|
||||
insts[tid].pop();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::wakeCPU()
|
||||
@@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts()
|
||||
// event adds the instruction to the queue to commit
|
||||
fault = ldstQueue.executeLoad(inst);
|
||||
} else if (inst->isStore()) {
|
||||
ldstQueue.executeStore(inst);
|
||||
fault = ldstQueue.executeStore(inst);
|
||||
|
||||
// If the store had a fault then it may not have a mem req
|
||||
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
|
||||
if (!inst->isStoreConditional() && fault == NoFault) {
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
} else if (fault != NoFault) {
|
||||
// If the instruction faulted, then we need to send it along to commit
|
||||
// without the instruction completing.
|
||||
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
inst->setExecuted();
|
||||
|
||||
instToCommit(inst);
|
||||
activityThisCycle();
|
||||
}
|
||||
|
||||
// Store conditionals will mark themselves as
|
||||
@@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts()
|
||||
// E.g. Uncached loads have not actually executed when they
|
||||
// are first sent to commit. Instead commit must tell the LSQ
|
||||
// when it's ready to execute the uncached load.
|
||||
if (!inst->isSquashed() && inst->isExecuted()) {
|
||||
if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
|
||||
int dependents = instQueue.wakeDependents(inst);
|
||||
|
||||
for (int i = 0; i < inst->numDestRegs(); i++) {
|
||||
|
||||
@@ -479,13 +479,13 @@ class InstructionQueue
|
||||
/** Distribution of number of instructions in the queue.
|
||||
* @todo: Need to create struct to track the entry time for each
|
||||
* instruction. */
|
||||
Stats::VectorDistribution<> queueResDist;
|
||||
// Stats::VectorDistribution<> queueResDist;
|
||||
/** Distribution of the number of instructions issued. */
|
||||
Stats::Distribution<> numIssuedDist;
|
||||
/** Distribution of the cycles it takes to issue an instruction.
|
||||
* @todo: Need to create struct to track the ready time for each
|
||||
* instruction. */
|
||||
Stats::VectorDistribution<> issueDelayDist;
|
||||
// Stats::VectorDistribution<> issueDelayDist;
|
||||
|
||||
/** Number of times an instruction could not be issued because a
|
||||
* FU was busy.
|
||||
|
||||
@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
|
||||
.name(name() + ".iqSquashedNonSpecRemoved")
|
||||
.desc("Number of squashed non-spec instructions that were removed")
|
||||
.prereq(iqSquashedNonSpecRemoved);
|
||||
|
||||
/*
|
||||
queueResDist
|
||||
.init(Num_OpClasses, 0, 99, 2)
|
||||
.name(name() + ".IQ:residence:")
|
||||
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
|
||||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
queueResDist.subname(i, opClassStrings[i]);
|
||||
}
|
||||
*/
|
||||
numIssuedDist
|
||||
.init(0,totalWidth,1)
|
||||
.name(name() + ".ISSUE:issued_per_cycle")
|
||||
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
|
||||
//
|
||||
// How long did instructions for a particular FU type wait prior to issue
|
||||
//
|
||||
|
||||
/*
|
||||
issueDelayDist
|
||||
.init(Num_OpClasses,0,99,2)
|
||||
.name(name() + ".ISSUE:")
|
||||
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
|
||||
subname << opClassStrings[i] << "_delay";
|
||||
issueDelayDist.subname(i, subname.str());
|
||||
}
|
||||
|
||||
*/
|
||||
issueRate
|
||||
.name(name() + ".ISSUE:rate")
|
||||
.desc("Inst issue rate")
|
||||
@@ -385,8 +386,16 @@ template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::switchOut()
|
||||
{
|
||||
/*
|
||||
if (!instList[0].empty() || (numEntries != freeEntries) ||
|
||||
!readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
|
||||
dumpInsts();
|
||||
// assert(0);
|
||||
}
|
||||
*/
|
||||
resetState();
|
||||
dependGraph.reset();
|
||||
instsToExecute.clear();
|
||||
switchedOut = true;
|
||||
for (int i = 0; i < numThreads; ++i) {
|
||||
memDepUnit[i].switchOut();
|
||||
@@ -642,9 +651,12 @@ template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
|
||||
{
|
||||
DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
|
||||
// The CPU could have been sleeping until this op completed (*extremely*
|
||||
// long latency op). Wake it if it was. This may be overkill.
|
||||
if (isSwitchedOut()) {
|
||||
DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
|
||||
inst->seqNum);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
(squashed_inst->isMemRef() &&
|
||||
!squashed_inst->memOpDone)) {
|
||||
|
||||
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
|
||||
"squashed.\n",
|
||||
tid, squashed_inst->seqNum, squashed_inst->readPC());
|
||||
|
||||
// Remove the instruction from the dependency list.
|
||||
if (!squashed_inst->isNonSpeculative() &&
|
||||
!squashed_inst->isStoreConditional() &&
|
||||
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
|
||||
++iqSquashedOperandsExamined;
|
||||
}
|
||||
} else {
|
||||
} else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
|
||||
NonSpecMapIt ns_inst_it =
|
||||
nonSpecInsts.find(squashed_inst->seqNum);
|
||||
assert(ns_inst_it != nonSpecInsts.end());
|
||||
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
|
||||
count[squashed_inst->threadNumber]--;
|
||||
|
||||
++freeEntries;
|
||||
|
||||
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
|
||||
"squashed.\n",
|
||||
tid, squashed_inst->seqNum, squashed_inst->readPC());
|
||||
}
|
||||
|
||||
instList[tid].erase(squash_it--);
|
||||
|
||||
@@ -165,6 +165,16 @@ LSQ<Impl>::regStats()
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::regStats()
|
||||
{
|
||||
//Initialize LSQs
|
||||
for (int tid=0; tid < numThreads; tid++) {
|
||||
thread[tid].regStats();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)
|
||||
|
||||
@@ -407,20 +407,9 @@ class LSQUnit {
|
||||
// Will also need how many read/write ports the Dcache has. Or keep track
|
||||
// of that in stage that is one level up, and only call executeLoad/Store
|
||||
// the appropriate number of times.
|
||||
|
||||
/** Total number of loads forwaded from LSQ stores. */
|
||||
Stats::Scalar<> lsqForwLoads;
|
||||
|
||||
/** Total number of loads ignored due to invalid addresses. */
|
||||
Stats::Scalar<> invAddrLoads;
|
||||
|
||||
/** Total number of squashed loads. */
|
||||
Stats::Scalar<> lsqSquashedLoads;
|
||||
|
||||
/** Total number of responses from the memory system that are
|
||||
* ignored due to the instruction already being squashed. */
|
||||
Stats::Scalar<> lsqIgnoredResponses;
|
||||
|
||||
/** Total number of squashed stores. */
|
||||
Stats::Scalar<> lsqSquashedStores;
|
||||
|
||||
|
||||
@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
|
||||
.name(name() + ".ignoredResponses")
|
||||
.desc("Number of memory responses ignored because the instruction is squashed");
|
||||
|
||||
lsqMemOrderViolation
|
||||
.name(name() + ".memOrderViolation")
|
||||
.desc("Number of memory ordering violations");
|
||||
|
||||
lsqSquashedStores
|
||||
.name(name() + ".squashedStores")
|
||||
.desc("Number of stores squashed");
|
||||
@@ -220,8 +224,10 @@ void
|
||||
LSQUnit<Impl>::switchOut()
|
||||
{
|
||||
switchedOut = true;
|
||||
for (int i = 0; i < loadQueue.size(); ++i)
|
||||
for (int i = 0; i < loadQueue.size(); ++i) {
|
||||
assert(!loadQueue[i]);
|
||||
loadQueue[i] = NULL;
|
||||
}
|
||||
|
||||
assert(storesToWB == 0);
|
||||
}
|
||||
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
|
||||
if (load_fault != NoFault) {
|
||||
// Send this instruction to commit, also make sure iew stage
|
||||
// realizes there is activity.
|
||||
// Mark it as executed unless it is an uncached load that
|
||||
// needs to hit the head of commit.
|
||||
if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
|
||||
inst->setExecuted();
|
||||
}
|
||||
iewStage->instToCommit(inst);
|
||||
iewStage->activityThisCycle();
|
||||
}
|
||||
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
|
||||
// A load incorrectly passed this store. Squash and refetch.
|
||||
// For now return a fault to show that it was unsuccessful.
|
||||
memDepViolator = loadQueue[load_idx];
|
||||
++lsqMemOrderViolation;
|
||||
|
||||
return genMachineCheckFault();
|
||||
}
|
||||
|
||||
@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::switchOut()
|
||||
{
|
||||
assert(instList[0].empty());
|
||||
assert(instsToReplay.empty());
|
||||
assert(memDepHash.empty());
|
||||
// Clear any state.
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||
instList[i].clear();
|
||||
|
||||
@@ -417,6 +417,8 @@ class DefaultRename
|
||||
/** The maximum skid buffer size. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
PhysRegIndex maxPhysicalRegs;
|
||||
|
||||
/** Enum to record the source of a structure full stall. Can come from
|
||||
* either ROB, IQ, LSQ, and it is priortized in that order.
|
||||
*/
|
||||
|
||||
@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
|
||||
commitToRenameDelay(params->commitToRenameDelay),
|
||||
renameWidth(params->renameWidth),
|
||||
commitWidth(params->commitWidth),
|
||||
numThreads(params->numberOfThreads)
|
||||
numThreads(params->numberOfThreads),
|
||||
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
|
||||
// Put the renamed physical register back on the free list.
|
||||
freeList->addReg(hb_it->newPhysReg);
|
||||
|
||||
// Be sure to mark its register as ready if it's a misc register.
|
||||
if (hb_it->newPhysReg >= maxPhysicalRegs) {
|
||||
scoreboard->setReg(hb_it->newPhysReg);
|
||||
}
|
||||
|
||||
historyBuffer[i].erase(hb_it++);
|
||||
}
|
||||
insts[i].clear();
|
||||
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
|
||||
// Put the renamed physical register back on the free list.
|
||||
freeList->addReg(hb_it->newPhysReg);
|
||||
|
||||
// Be sure to mark its register as ready if it's a misc register.
|
||||
if (hb_it->newPhysReg >= maxPhysicalRegs) {
|
||||
scoreboard->setReg(hb_it->newPhysReg);
|
||||
}
|
||||
|
||||
historyBuffer[tid].erase(hb_it++);
|
||||
|
||||
++renameUndoneMaps;
|
||||
|
||||
@@ -31,8 +31,11 @@
|
||||
#ifndef __CPU_O3_THREAD_STATE_HH__
|
||||
#define __CPU_O3_THREAD_STATE_HH__
|
||||
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class Event;
|
||||
class Process;
|
||||
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
|
||||
#if FULL_SYSTEM
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num)
|
||||
: ThreadState(-1, _thread_num),
|
||||
inSyscall(0), trapPending(0)
|
||||
{ }
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
{
|
||||
if (cpu->params->profile) {
|
||||
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<O3ThreadState,
|
||||
&O3ThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
}
|
||||
#else
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
|
||||
MemObject *mem)
|
||||
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
|
||||
/** Handles the syscall. */
|
||||
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
|
||||
#endif
|
||||
|
||||
#if FULL_SYSTEM
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(xcProxy, *os);
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_THREAD_STATE_HH__
|
||||
|
||||
@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||
for (int i = 0; i < localPredictorSize; ++i)
|
||||
localCtrs[i].setBits(localCtrBits);
|
||||
|
||||
localPredictorMask = floorPow2(localPredictorSize) - 1;
|
||||
|
||||
if (!isPowerOf2(localHistoryTableSize)) {
|
||||
fatal("Invalid local history table size!\n");
|
||||
}
|
||||
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||
//Lookup in the local predictor to get its branch prediction
|
||||
local_history_idx = calcLocHistIdx(branch_addr);
|
||||
local_predictor_idx = localHistoryTable[local_history_idx]
|
||||
& localHistoryMask;
|
||||
& localPredictorMask;
|
||||
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
|
||||
|
||||
//Lookup in the global predictor to get its branch prediction
|
||||
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||
bp_history = (void *)history;
|
||||
|
||||
assert(globalHistory < globalPredictorSize &&
|
||||
local_history_idx < localPredictorSize);
|
||||
local_history_idx < localHistoryTableSize &&
|
||||
local_predictor_idx < localPredictorSize);
|
||||
|
||||
// Commented code is for doing speculative update of counters and
|
||||
// all histories.
|
||||
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||
// Get the local predictor's current prediction
|
||||
local_history_idx = calcLocHistIdx(branch_addr);
|
||||
local_predictor_hist = localHistoryTable[local_history_idx];
|
||||
local_predictor_idx = local_predictor_hist & localHistoryMask;
|
||||
local_predictor_idx = local_predictor_hist & localPredictorMask;
|
||||
|
||||
// Update the choice predictor to tell it which one was correct if
|
||||
// there was a prediction.
|
||||
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||
}
|
||||
|
||||
assert(globalHistory < globalPredictorSize &&
|
||||
local_history_idx < localHistoryTableSize &&
|
||||
local_predictor_idx < localPredictorSize);
|
||||
|
||||
// Update the counters and local history with the proper
|
||||
|
||||
@@ -159,6 +159,9 @@ class TournamentBP
|
||||
/** Size of the local predictor. */
|
||||
unsigned localPredictorSize;
|
||||
|
||||
/** Mask to get the proper index bits into the predictor. */
|
||||
unsigned localPredictorMask;
|
||||
|
||||
/** Number of bits of the local predictor's counters. */
|
||||
unsigned localCtrBits;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user