Merge ktlim@zamp:./local/clean/o3-merge/m5

into  zamp.eecs.umich.edu:/z/ktlim2/clean/o3-merge/newmem

configs/boot/micro_memlat.rcS:
configs/boot/micro_tlblat.rcS:
src/arch/alpha/ev5.cc:
src/arch/alpha/isa/decoder.isa:
src/arch/alpha/isa_traits.hh:
src/cpu/base.cc:
src/cpu/base.hh:
src/cpu/base_dyn_inst.hh:
src/cpu/checker/cpu.hh:
src/cpu/checker/cpu_impl.hh:
src/cpu/o3/alpha/cpu_impl.hh:
src/cpu/o3/alpha/params.hh:
src/cpu/o3/checker_builder.cc:
src/cpu/o3/commit_impl.hh:
src/cpu/o3/cpu.cc:
src/cpu/o3/decode_impl.hh:
src/cpu/o3/fetch_impl.hh:
src/cpu/o3/iew.hh:
src/cpu/o3/iew_impl.hh:
src/cpu/o3/inst_queue.hh:
src/cpu/o3/lsq.hh:
src/cpu/o3/lsq_impl.hh:
src/cpu/o3/lsq_unit.hh:
src/cpu/o3/lsq_unit_impl.hh:
src/cpu/o3/regfile.hh:
src/cpu/o3/rename_impl.hh:
src/cpu/o3/thread_state.hh:
src/cpu/ozone/checker_builder.cc:
src/cpu/ozone/cpu.hh:
src/cpu/ozone/cpu_impl.hh:
src/cpu/ozone/front_end.hh:
src/cpu/ozone/front_end_impl.hh:
src/cpu/ozone/lw_back_end.hh:
src/cpu/ozone/lw_back_end_impl.hh:
src/cpu/ozone/lw_lsq.hh:
src/cpu/ozone/lw_lsq_impl.hh:
src/cpu/ozone/thread_state.hh:
src/cpu/simple/base.cc:
src/cpu/simple_thread.cc:
src/cpu/simple_thread.hh:
src/cpu/thread_state.hh:
src/dev/ide_disk.cc:
src/python/m5/objects/O3CPU.py:
src/python/m5/objects/Root.py:
src/python/m5/objects/System.py:
src/sim/pseudo_inst.cc:
src/sim/pseudo_inst.hh:
src/sim/system.hh:
util/m5/m5.c:
    Hand merge.

--HG--
rename : arch/alpha/ev5.cc => src/arch/alpha/ev5.cc
rename : arch/alpha/freebsd/system.cc => src/arch/alpha/freebsd/system.cc
rename : arch/alpha/isa/decoder.isa => src/arch/alpha/isa/decoder.isa
rename : arch/alpha/isa/mem.isa => src/arch/alpha/isa/mem.isa
rename : arch/alpha/isa_traits.hh => src/arch/alpha/isa_traits.hh
rename : arch/alpha/linux/system.cc => src/arch/alpha/linux/system.cc
rename : arch/alpha/system.cc => src/arch/alpha/system.cc
rename : arch/alpha/tru64/system.cc => src/arch/alpha/tru64/system.cc
rename : cpu/base.cc => src/cpu/base.cc
rename : cpu/base.hh => src/cpu/base.hh
rename : cpu/base_dyn_inst.hh => src/cpu/base_dyn_inst.hh
rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh
rename : cpu/checker/cpu.cc => src/cpu/checker/cpu_impl.hh
rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha/cpu_builder.cc
rename : cpu/checker/o3_cpu_builder.cc => src/cpu/o3/checker_builder.cc
rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh
rename : cpu/o3/cpu.cc => src/cpu/o3/cpu.cc
rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh
rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh
rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh
rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh
rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh
rename : cpu/o3/lsq_impl.hh => src/cpu/o3/lsq_impl.hh
rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh
rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh
rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh
rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh
rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh
rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh
rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc
rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh
rename : cpu/checker/cpu_builder.cc => src/cpu/ozone/checker_builder.cc
rename : cpu/ozone/cpu.hh => src/cpu/ozone/cpu.hh
rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc
rename : cpu/ozone/cpu_impl.hh => src/cpu/ozone/cpu_impl.hh
rename : cpu/ozone/front_end.hh => src/cpu/ozone/front_end.hh
rename : cpu/ozone/front_end_impl.hh => src/cpu/ozone/front_end_impl.hh
rename : cpu/ozone/inorder_back_end_impl.hh => src/cpu/ozone/inorder_back_end_impl.hh
rename : cpu/ozone/inst_queue_impl.hh => src/cpu/ozone/inst_queue_impl.hh
rename : cpu/ozone/lw_back_end.hh => src/cpu/ozone/lw_back_end.hh
rename : cpu/ozone/lw_back_end_impl.hh => src/cpu/ozone/lw_back_end_impl.hh
rename : cpu/ozone/lw_lsq.hh => src/cpu/ozone/lw_lsq.hh
rename : cpu/ozone/lw_lsq_impl.hh => src/cpu/ozone/lw_lsq_impl.hh
rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh
rename : cpu/ozone/thread_state.hh => src/cpu/ozone/thread_state.hh
rename : cpu/simple/cpu.cc => src/cpu/simple/base.cc
rename : cpu/cpu_exec_context.cc => src/cpu/simple_thread.cc
rename : cpu/thread_state.hh => src/cpu/thread_state.hh
rename : dev/ide_disk.hh => src/dev/ide_disk.hh
rename : python/m5/objects/BaseCPU.py => src/python/m5/objects/BaseCPU.py
rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/O3CPU.py
rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py
rename : python/m5/objects/Root.py => src/python/m5/objects/Root.py
rename : python/m5/objects/System.py => src/python/m5/objects/System.py
rename : sim/eventq.hh => src/sim/eventq.hh
rename : sim/pseudo_inst.cc => src/sim/pseudo_inst.cc
rename : sim/pseudo_inst.hh => src/sim/pseudo_inst.hh
rename : sim/serialize.cc => src/sim/serialize.cc
rename : sim/stat_control.cc => src/sim/stat_control.cc
rename : sim/stat_control.hh => src/sim/stat_control.hh
rename : sim/system.hh => src/sim/system.hh
extra : convert_revision : 135d90e43f6cea89f9460ba4e23f4b0b85886e7d
This commit is contained in:
Kevin Lim
2006-09-30 23:43:23 -04:00
87 changed files with 8363 additions and 515 deletions

View File

@@ -56,6 +56,7 @@ SimObjectParam<System *> system;
Param<int> cpu_id;
SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
Param<Tick> profile;
#else
SimObjectVectorParam<Process *> workload;
#endif // FULL_SYSTEM
@@ -68,6 +69,8 @@ Param<Counter> max_insts_any_thread;
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
Param<unsigned> cachePorts;
@@ -162,6 +165,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
INIT_PARAM(cpu_id, "processor ID"),
INIT_PARAM(itb, "Instruction translation buffer"),
INIT_PARAM(dtb, "Data translation buffer"),
INIT_PARAM(profile, ""),
#else
INIT_PARAM(workload, "Processes to run"),
#endif // FULL_SYSTEM
@@ -184,6 +188,10 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivO3CPU)
"Terminate when all threads have reached this load"
"count",
0),
INIT_PARAM_DFLT(stats_reset_inst,
"blah",
0),
INIT_PARAM_DFLT(progress_interval, "Progress interval", 0),
INIT_PARAM_DFLT(cachePorts, "Cache Ports", 200),
@@ -305,6 +313,7 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->cpu_id = cpu_id;
params->itb = itb;
params->dtb = dtb;
params->profile = profile;
#else
params->workload = workload;
#endif // FULL_SYSTEM
@@ -317,6 +326,8 @@ CREATE_SIM_OBJECT(DerivO3CPU)
params->max_insts_all_threads = max_insts_all_threads;
params->max_loads_any_thread = max_loads_any_thread;
params->max_loads_all_threads = max_loads_all_threads;
params->stats_reset_inst = stats_reset_inst;
params->progress_interval = progress_interval;
//
// Caches

View File

@@ -64,6 +64,8 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<Counter> max_insts_all_threads;
Param<Counter> max_loads_any_thread;
Param<Counter> max_loads_all_threads;
Param<Counter> stats_reset_inst;
Param<Tick> progress_interval;
#if FULL_SYSTEM
SimObjectParam<AlphaITB *> itb;
@@ -78,6 +80,7 @@ BEGIN_DECLARE_SIM_OBJECT_PARAMS(O3Checker)
Param<bool> defer_registration;
Param<bool> exitOnError;
Param<bool> updateOnError;
Param<bool> warnOnlyOnLoadError;
Param<bool> function_trace;
Param<Tick> function_trace_start;
@@ -94,6 +97,9 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
"terminate when any thread reaches this load count"),
INIT_PARAM(max_loads_all_threads,
"terminate when all threads have reached this load count"),
INIT_PARAM(stats_reset_inst,
"blah"),
INIT_PARAM_DFLT(progress_interval, "CPU Progress Interval", 0),
#if FULL_SYSTEM
INIT_PARAM(itb, "Instruction TLB"),
@@ -109,6 +115,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(O3Checker)
INIT_PARAM(defer_registration, "defer system registration (for sampling)"),
INIT_PARAM(exitOnError, "exit on error"),
INIT_PARAM(updateOnError, "Update the checker with the main CPU's state on error"),
INIT_PARAM_DFLT(warnOnlyOnLoadError, "warn, but don't exit, if a load "
"result errors", false),
INIT_PARAM(function_trace, "Enable function trace"),
@@ -126,7 +133,9 @@ CREATE_SIM_OBJECT(O3Checker)
params->max_insts_all_threads = 0;
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->stats_reset_inst = 0;
params->exitOnError = exitOnError;
params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->deferRegistration = defer_registration;
params->functionTrace = function_trace;
@@ -139,6 +148,10 @@ CREATE_SIM_OBJECT(O3Checker)
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
temp = stats_reset_inst;
Tick temp2 = progress_interval;
params->progress_interval = 0;
temp2++;
#if FULL_SYSTEM
params->itb = itb;

View File

@@ -1083,12 +1083,26 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
// Generate trap squash event.
generateTrapEvent(tid);
// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC());
return false;
}
updateComInstStats(head_inst);
#if FULL_SYSTEM
if (thread[tid]->profile) {
// bool usermode =
// (cpu->readMiscReg(AlphaISA::IPR_DTB_CM, tid) & 0x18) != 0;
// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC();
thread[tid]->profilePC = head_inst->readPC();
ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getXCProxy(),
head_inst->staticInst);
if (node)
thread[tid]->profileNode = node;
}
#endif
if (head_inst->traceData) {
head_inst->traceData->setFetchSeq(head_inst->seqNum);
head_inst->traceData->setCPSeq(thread[tid]->numInst);
@@ -1102,6 +1116,9 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
head_inst->renamedDestRegIdx(i));
}
if (head_inst->isCopy())
panic("Should not commit any copy instructions!");
// Finally clear the head ROB entry.
rob->retireHead(tid);

View File

@@ -33,6 +33,7 @@
#include "config/use_checker.hh"
#if FULL_SYSTEM
#include "cpu/quiesce_event.hh"
#include "sim/system.hh"
#else
#include "sim/process.hh"
@@ -793,6 +794,8 @@ template <class Impl>
unsigned int
FullO3CPU<Impl>::drain(Event *drain_event)
{
DPRINTF(O3CPU, "Switching out\n");
BaseCPU::switchOut(_sampler);
drainCount = 0;
fetch.drain();
decode.drain();
@@ -863,6 +866,7 @@ FullO3CPU<Impl>::switchOut()
{
fetch.switchOut();
rename.switchOut();
iew.switchOut();
commit.switchOut();
instList.clear();
while (!removeList.empty()) {
@@ -930,6 +934,45 @@ FullO3CPU<Impl>::takeOverFrom(BaseCPU *oldCPU)
tickEvent.schedule(curTick);
}
template <class Impl>
void
FullO3CPU<Impl>::serialize(std::ostream &os)
{
BaseCPU::serialize(os);
nameOut(os, csprintf("%s.tickEvent", name()));
tickEvent.serialize(os);
// Use SimpleThread's ability to checkpoint to make it easier to
// write out the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
for (int i = 0; i < thread.size(); i++) {
nameOut(os, csprintf("%s.xc.%i", name(), i));
temp.copyXC(thread[i]->getXCProxy());
temp.serialize(os);
}
}
template <class Impl>
void
FullO3CPU<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
BaseCPU::unserialize(cp, section);
tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
// Use SimpleThread's ability to checkpoint to make it easier to
// read in the registers. Also make this static so it doesn't
// get instantiated multiple times (causes a panic in statistics).
static CPUExecContext temp;
for (int i = 0; i < thread.size(); i++) {
temp.copyXC(thread[i]->getXCProxy());
temp.unserialize(cp, csprintf("%s.xc.%i", section, i));
thread[i]->getXCProxy()->copyArchRegs(temp.getProxy());
}
}
template <class Impl>
uint64_t
FullO3CPU<Impl>::readIntReg(int reg_idx)

View File

@@ -442,6 +442,7 @@ DefaultFetch<Impl>::takeOverFrom()
wroteToTimeBuffer = false;
_status = Inactive;
switchedOut = false;
interruptPending = false;
branchPred.takeOverFrom();
}
@@ -563,7 +564,7 @@ DefaultFetch<Impl>::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, unsigned tid
unsigned flags = 0;
#endif // FULL_SYSTEM
if (cacheBlocked || (interruptPending && flags == 0)) {
if (cacheBlocked || isSwitchedOut() || (interruptPending && flags == 0)) {
// Hold off fetch from getting new instructions when:
// Cache is blocked, or
// while an interrupt is pending and we're not in PAL mode, or
@@ -1152,8 +1153,8 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetch_PC = next_PC;
if (instruction->isQuiesce()) {
warn("cycle %lli: Quiesce instruction encountered, halting fetch!",
curTick);
// warn("%lli: Quiesce instruction encountered, halting fetch!",
// curTick);
fetchStatus[tid] = QuiescePending;
++numInst;
status_change = true;
@@ -1268,7 +1269,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
fetchStatus[tid] = TrapPending;
status_change = true;
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
// warn("%lli fault (%d) detected @ PC %08p", curTick, fault, PC[tid]);
#else // !FULL_SYSTEM
warn("cycle %lli: fault (%s) detected @ PC %08p", curTick, fault->name(), PC[tid]);
#endif // FULL_SYSTEM

View File

@@ -216,6 +216,7 @@ class DefaultIEW
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
@@ -226,6 +227,7 @@ class DefaultIEW
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i\n", wbOutstanding);
assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
@@ -450,7 +452,9 @@ class DefaultIEW
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
unsigned wbOutstanding;
/** Number of instructions in flight that will writeback. */
int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
@@ -507,6 +511,8 @@ class DefaultIEW
Stats::Scalar<> iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector<> iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
// Stats::Scalar<> iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar<> iewExecSquashedInsts;
/** Number of executed software prefetches. */

View File

@@ -162,17 +162,17 @@ DefaultIEW<Impl>::regStats()
branchMispredicts = predictedTakenIncorrect + predictedNotTakenIncorrect;
iewExecutedInsts
.name(name() + ".EXEC:insts")
.name(name() + ".iewExecutedInsts")
.desc("Number of executed instructions");
iewExecLoadInsts
.init(cpu->number_of_threads)
.name(name() + ".EXEC:loads")
.name(name() + ".iewExecLoadInsts")
.desc("Number of load instructions executed")
.flags(total);
iewExecSquashedInsts
.name(name() + ".EXEC:squashedInsts")
.name(name() + ".iewExecSquashedInsts")
.desc("Number of squashed instructions skipped in execute");
iewExecutedSwp
@@ -372,6 +372,8 @@ DefaultIEW<Impl>::switchOut()
{
// Clear any state.
switchedOut = true;
assert(insts[0].empty());
assert(skidBuffer[0].empty());
instQueue.switchOut();
ldstQueue.switchOut();
@@ -410,7 +412,6 @@ DefaultIEW<Impl>::takeOverFrom()
updateLSQNextCycle = false;
// @todo: Fix hardcoded number
for (int i = 0; i < issueToExecQueue.getSize(); ++i) {
issueToExecQueue.advance();
}
@@ -611,9 +612,11 @@ DefaultIEW<Impl>::instToCommit(DynInstPtr &inst)
wbNumInst = 0;
}
assert((wbCycle * wbWidth + wbNumInst) < wbMax);
assert((wbCycle * wbWidth + wbNumInst) <= wbMax);
}
DPRINTF(IEW, "Current wb cycle: %i, width: %i, numInst: %i\nwbActual:%i\n",
wbCycle, wbWidth, wbNumInst, wbCycle * wbWidth + wbNumInst);
// Add finished instruction to queue to commit.
(*iewQueue)[wbCycle].insts[wbNumInst] = inst;
(*iewQueue)[wbCycle].size++;
@@ -901,6 +904,22 @@ DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
}
}
template <class Impl>
void
DefaultIEW<Impl>::emptyRenameInsts(unsigned tid)
{
while (!insts[tid].empty()) {
if (insts[tid].front()->isLoad() ||
insts[tid].front()->isStore() ) {
toRename->iewInfo[tid].dispatchedToLSQ++;
}
toRename->iewInfo[tid].dispatched++;
insts[tid].pop();
}
}
template <class Impl>
void
DefaultIEW<Impl>::wakeCPU()
@@ -1273,13 +1292,23 @@ DefaultIEW<Impl>::executeInsts()
// event adds the instruction to the queue to commit
fault = ldstQueue.executeLoad(inst);
} else if (inst->isStore()) {
ldstQueue.executeStore(inst);
fault = ldstQueue.executeStore(inst);
// If the store had a fault then it may not have a mem req
if (inst->req && !(inst->req->getFlags() & LOCKED)) {
if (!inst->isStoreConditional() && fault == NoFault) {
inst->setExecuted();
instToCommit(inst);
} else if (fault != NoFault) {
// If the instruction faulted, then we need to send it along to commit
// without the instruction completing.
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
inst->setExecuted();
instToCommit(inst);
activityThisCycle();
}
// Store conditionals will mark themselves as
@@ -1404,7 +1433,7 @@ DefaultIEW<Impl>::writebackInsts()
// E.g. Uncached loads have not actually executed when they
// are first sent to commit. Instead commit must tell the LSQ
// when it's ready to execute the uncached load.
if (!inst->isSquashed() && inst->isExecuted()) {
if (!inst->isSquashed() && inst->isExecuted() && inst->getFault() == NoFault) {
int dependents = instQueue.wakeDependents(inst);
for (int i = 0; i < inst->numDestRegs(); i++) {

View File

@@ -479,13 +479,13 @@ class InstructionQueue
/** Distribution of number of instructions in the queue.
* @todo: Need to create struct to track the entry time for each
* instruction. */
Stats::VectorDistribution<> queueResDist;
// Stats::VectorDistribution<> queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution<> numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction.
* @todo: Need to create struct to track the ready time for each
* instruction. */
Stats::VectorDistribution<> issueDelayDist;
// Stats::VectorDistribution<> issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.

View File

@@ -230,7 +230,7 @@ InstructionQueue<Impl>::regStats()
.name(name() + ".iqSquashedNonSpecRemoved")
.desc("Number of squashed non-spec instructions that were removed")
.prereq(iqSquashedNonSpecRemoved);
/*
queueResDist
.init(Num_OpClasses, 0, 99, 2)
.name(name() + ".IQ:residence:")
@@ -240,6 +240,7 @@ InstructionQueue<Impl>::regStats()
for (int i = 0; i < Num_OpClasses; ++i) {
queueResDist.subname(i, opClassStrings[i]);
}
*/
numIssuedDist
.init(0,totalWidth,1)
.name(name() + ".ISSUE:issued_per_cycle")
@@ -268,7 +269,7 @@ InstructionQueue<Impl>::regStats()
//
// How long did instructions for a particular FU type wait prior to issue
//
/*
issueDelayDist
.init(Num_OpClasses,0,99,2)
.name(name() + ".ISSUE:")
@@ -281,7 +282,7 @@ InstructionQueue<Impl>::regStats()
subname << opClassStrings[i] << "_delay";
issueDelayDist.subname(i, subname.str());
}
*/
issueRate
.name(name() + ".ISSUE:rate")
.desc("Inst issue rate")
@@ -385,8 +386,16 @@ template <class Impl>
void
InstructionQueue<Impl>::switchOut()
{
/*
if (!instList[0].empty() || (numEntries != freeEntries) ||
!readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) {
dumpInsts();
// assert(0);
}
*/
resetState();
dependGraph.reset();
instsToExecute.clear();
switchedOut = true;
for (int i = 0; i < numThreads; ++i) {
memDepUnit[i].switchOut();
@@ -642,9 +651,12 @@ template <class Impl>
void
InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
{
DPRINTF(IQ, "Processing FU completion [sn:%lli]\n", inst->seqNum);
// The CPU could have been sleeping until this op completed (*extremely*
// long latency op). Wake it if it was. This may be overkill.
if (isSwitchedOut()) {
DPRINTF(IQ, "FU completion not processed, IQ is switched out [sn:%lli]\n",
inst->seqNum);
return;
}
@@ -1036,6 +1048,10 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
(squashed_inst->isMemRef() &&
!squashed_inst->memOpDone)) {
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
"squashed.\n",
tid, squashed_inst->seqNum, squashed_inst->readPC());
// Remove the instruction from the dependency list.
if (!squashed_inst->isNonSpeculative() &&
!squashed_inst->isStoreConditional() &&
@@ -1066,7 +1082,7 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
++iqSquashedOperandsExamined;
}
} else {
} else if (!squashed_inst->isStoreConditional() || !squashed_inst->isCompleted()) {
NonSpecMapIt ns_inst_it =
nonSpecInsts.find(squashed_inst->seqNum);
assert(ns_inst_it != nonSpecInsts.end());
@@ -1093,10 +1109,6 @@ InstructionQueue<Impl>::doSquash(unsigned tid)
count[squashed_inst->threadNumber]--;
++freeEntries;
DPRINTF(IQ, "[tid:%i]: Instruction [sn:%lli] PC %#x "
"squashed.\n",
tid, squashed_inst->seqNum, squashed_inst->readPC());
}
instList[tid].erase(squash_it--);

View File

@@ -165,6 +165,16 @@ LSQ<Impl>::regStats()
}
}
template<class Impl>
void
LSQ<Impl>::regStats()
{
//Initialize LSQs
for (int tid=0; tid < numThreads; tid++) {
thread[tid].regStats();
}
}
template<class Impl>
void
LSQ<Impl>::setActiveThreads(std::list<unsigned> *at_ptr)

View File

@@ -407,20 +407,9 @@ class LSQUnit {
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar<> lsqForwLoads;
/** Total number of loads ignored due to invalid addresses. */
Stats::Scalar<> invAddrLoads;
/** Total number of squashed loads. */
Stats::Scalar<> lsqSquashedLoads;
/** Total number of responses from the memory system that are
* ignored due to the instruction already being squashed. */
Stats::Scalar<> lsqIgnoredResponses;
/** Total number of squashed stores. */
Stats::Scalar<> lsqSquashedStores;

View File

@@ -180,6 +180,10 @@ LSQUnit<Impl>::regStats()
.name(name() + ".ignoredResponses")
.desc("Number of memory responses ignored because the instruction is squashed");
lsqMemOrderViolation
.name(name() + ".memOrderViolation")
.desc("Number of memory ordering violations");
lsqSquashedStores
.name(name() + ".squashedStores")
.desc("Number of stores squashed");
@@ -220,8 +224,10 @@ void
LSQUnit<Impl>::switchOut()
{
switchedOut = true;
for (int i = 0; i < loadQueue.size(); ++i)
for (int i = 0; i < loadQueue.size(); ++i) {
assert(!loadQueue[i]);
loadQueue[i] = NULL;
}
assert(storesToWB == 0);
}
@@ -408,6 +414,11 @@ LSQUnit<Impl>::executeLoad(DynInstPtr &inst)
if (load_fault != NoFault) {
// Send this instruction to commit, also make sure iew stage
// realizes there is activity.
// Mark it as executed unless it is an uncached load that
// needs to hit the head of commit.
if (!(inst->req->flags & UNCACHEABLE) || inst->isAtCommit()) {
inst->setExecuted();
}
iewStage->instToCommit(inst);
iewStage->activityThisCycle();
}
@@ -467,6 +478,7 @@ LSQUnit<Impl>::executeStore(DynInstPtr &store_inst)
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = loadQueue[load_idx];
++lsqMemOrderViolation;
return genMachineCheckFault();
}

View File

@@ -109,6 +109,9 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
assert(instList[0].empty());
assert(instsToReplay.empty());
assert(memDepHash.empty());
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();

View File

@@ -417,6 +417,8 @@ class DefaultRename
/** The maximum skid buffer size. */
unsigned skidBufferMax;
PhysRegIndex maxPhysicalRegs;
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/

View File

@@ -41,7 +41,8 @@ DefaultRename<Impl>::DefaultRename(Params *params)
commitToRenameDelay(params->commitToRenameDelay),
renameWidth(params->renameWidth),
commitWidth(params->commitWidth),
numThreads(params->numberOfThreads)
numThreads(params->numberOfThreads),
maxPhysicalRegs(params->numPhysIntRegs + params->numPhysFloatRegs)
{
_status = Inactive;
@@ -286,6 +287,11 @@ DefaultRename<Impl>::switchOut()
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
// Be sure to mark its register as ready if it's a misc register.
if (hb_it->newPhysReg >= maxPhysicalRegs) {
scoreboard->setReg(hb_it->newPhysReg);
}
historyBuffer[i].erase(hb_it++);
}
insts[i].clear();
@@ -889,6 +895,11 @@ DefaultRename<Impl>::doSquash(const InstSeqNum &squashed_seq_num, unsigned tid)
// Put the renamed physical register back on the free list.
freeList->addReg(hb_it->newPhysReg);
// Be sure to mark its register as ready if it's a misc register.
if (hb_it->newPhysReg >= maxPhysicalRegs) {
scoreboard->setReg(hb_it->newPhysReg);
}
historyBuffer[tid].erase(hb_it++);
++renameUndoneMaps;

View File

@@ -31,8 +31,11 @@
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
#include "base/callback.hh"
#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/sim_exit.hh"
class Event;
class Process;
@@ -75,8 +78,22 @@ struct O3ThreadState : public ThreadState {
#if FULL_SYSTEM
O3ThreadState(O3CPU *_cpu, int _thread_num)
: ThreadState(-1, _thread_num),
inSyscall(0), trapPending(0)
{ }
cpu(_cpu), inSyscall(0), trapPending(0)
{
if (cpu->params->profile) {
profile = new FunctionProfile(cpu->params->system->kernelSymtab);
Callback *cb =
new MakeCallback<O3ThreadState,
&O3ThreadState::dumpFuncProfile>(this);
registerExitCallback(cb);
}
// let's fill with a dummy node for now so we don't get a segfault
// on the first cycle when there's no node available.
static ProfileNode dummyNode;
profileNode = &dummyNode;
profilePC = 3;
}
#else
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process, int _asid,
MemObject *mem)
@@ -95,6 +112,14 @@ struct O3ThreadState : public ThreadState {
/** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
#endif
#if FULL_SYSTEM
void dumpFuncProfile()
{
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
profile->dump(xcProxy, *os);
}
#endif
};
#endif // __CPU_O3_THREAD_STATE_HH__

View File

@@ -62,6 +62,8 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
localPredictorMask = floorPow2(localPredictorSize) - 1;
if (!isPowerOf2(localHistoryTableSize)) {
fatal("Invalid local history table size!\n");
}
@@ -158,7 +160,7 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
& localPredictorMask;
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
@@ -176,7 +178,8 @@ TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
bp_history = (void *)history;
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Commented code is for doing speculative update of counters and
// all histories.
@@ -234,7 +237,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
// Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_hist = localHistoryTable[local_history_idx];
local_predictor_idx = local_predictor_hist & localHistoryMask;
local_predictor_idx = local_predictor_hist & localPredictorMask;
// Update the choice predictor to tell it which one was correct if
// there was a prediction.
@@ -256,6 +259,7 @@ TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
}
assert(globalHistory < globalPredictorSize &&
local_history_idx < localHistoryTableSize &&
local_predictor_idx < localPredictorSize);
// Update the counters and local history with the proper

View File

@@ -159,6 +159,9 @@ class TournamentBP
/** Size of the local predictor. */
unsigned localPredictorSize;
/** Mask to get the proper index bits into the predictor. */
unsigned localPredictorMask;
/** Number of bits of the local predictor's counters. */
unsigned localCtrBits;