cpu: Create an O3 namespace and simplify O3 names.

DefaultFoo => Foo
O3Foo => Foo
FullO3CPU => CPU

DerivO3CPU => O3CPU (python)

DerivO3 => o3::CPU

Change-Id: I04551214442633c79c33e9d86b067ff3ec0d1a8d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42120
Maintainer: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Nathanael Premillieu <nathanael.premillieu@huawei.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2021-03-03 08:09:16 -08:00
parent c76fa4d39e
commit 9909ea8a40
56 changed files with 1322 additions and 1140 deletions

View File

@@ -43,6 +43,7 @@ from m5.objects.FuncUnitConfig import *
class FUPool(SimObject):
type = 'FUPool'
cxx_class = 'o3::FUPool'
cxx_header = "cpu/o3/fu_pool.hh"
FUList = VectorParam.FUDesc("list of FU's for this pool")

View File

@@ -54,9 +54,10 @@ class SMTQueuePolicy(ScopedEnum):
class CommitPolicy(ScopedEnum):
vals = [ 'Aggressive', 'RoundRobin', 'OldestReady' ]
class DerivO3CPU(BaseCPU):
type = 'DerivO3CPU'
cxx_header = 'cpu/o3/deriv.hh'
class O3CPU(BaseCPU):
type = 'O3CPU'
cxx_class = 'o3::CPU'
cxx_header = 'cpu/o3/dyn_inst.hh'
@classmethod
def memory_mode(cls):
@@ -191,3 +192,6 @@ class DerivO3CPU(BaseCPU):
else:
print("ERROR: Checker only supported under ARM ISA!")
exit(1)
# Deprecated
DerivO3CPU = O3CPU

View File

@@ -29,4 +29,5 @@ from m5.objects.CheckerCPU import CheckerCPU
class O3Checker(CheckerCPU):
type = 'O3Checker'
cxx_class = 'o3::Checker'
cxx_header = 'cpu/o3/checker.hh'

View File

@@ -43,4 +43,4 @@
#include "cpu/checker/cpu_impl.hh"
template
class Checker<O3DynInstPtr>;
class Checker<o3::DynInstPtr>;

View File

@@ -44,13 +44,16 @@
#include "cpu/checker/cpu.hh"
#include "cpu/o3/dyn_inst.hh"
namespace o3
{
/**
* Specific non-templated derived class used for SimObject configuration.
*/
class O3Checker : public Checker<O3DynInstPtr>
class Checker : public ::Checker<DynInstPtr>
{
public:
O3Checker(const Params &p) : Checker<O3DynInstPtr>(p)
Checker(const Params &p) : ::Checker<DynInstPtr>(p)
{
// The checker should check all instructions executed by the main
// cpu and therefore any parameters for early exit don't make much
@@ -60,4 +63,6 @@ class O3Checker : public Checker<O3DynInstPtr>
}
};
} // namespace o3
#endif // __CPU_O3_CHECKER_HH__

View File

@@ -51,7 +51,7 @@
#include "cpu/o3/limits.hh"
#include "sim/faults.hh"
namespace O3Comm
namespace o3
{
/** Struct that defines the information passed from fetch to decode. */
@@ -59,7 +59,7 @@ struct FetchStruct
{
int size;
O3DynInstPtr insts[O3MaxWidth];
DynInstPtr insts[MaxWidth];
Fault fetchFault;
InstSeqNum fetchFaultSN;
bool clearFetchFault;
@@ -70,7 +70,7 @@ struct DecodeStruct
{
int size;
O3DynInstPtr insts[O3MaxWidth];
DynInstPtr insts[MaxWidth];
};
/** Struct that defines the information passed from rename to IEW. */
@@ -78,7 +78,7 @@ struct RenameStruct
{
int size;
O3DynInstPtr insts[O3MaxWidth];
DynInstPtr insts[MaxWidth];
};
/** Struct that defines the information passed from IEW to commit. */
@@ -86,23 +86,23 @@ struct IEWStruct
{
int size;
O3DynInstPtr insts[O3MaxWidth];
O3DynInstPtr mispredictInst[O3MaxThreads];
Addr mispredPC[O3MaxThreads];
InstSeqNum squashedSeqNum[O3MaxThreads];
TheISA::PCState pc[O3MaxThreads];
DynInstPtr insts[MaxWidth];
DynInstPtr mispredictInst[MaxThreads];
Addr mispredPC[MaxThreads];
InstSeqNum squashedSeqNum[MaxThreads];
TheISA::PCState pc[MaxThreads];
bool squash[O3MaxThreads];
bool branchMispredict[O3MaxThreads];
bool branchTaken[O3MaxThreads];
bool includeSquashInst[O3MaxThreads];
bool squash[MaxThreads];
bool branchMispredict[MaxThreads];
bool branchTaken[MaxThreads];
bool includeSquashInst[MaxThreads];
};
struct IssueStruct
{
int size;
O3DynInstPtr insts[O3MaxWidth];
DynInstPtr insts[MaxWidth];
};
/** Struct that defines all backwards communication. */
@@ -111,8 +111,8 @@ struct TimeStruct
struct DecodeComm
{
TheISA::PCState nextPC;
O3DynInstPtr mispredictInst;
O3DynInstPtr squashInst;
DynInstPtr mispredictInst;
DynInstPtr squashInst;
InstSeqNum doneSeqNum;
Addr mispredPC;
uint64_t branchAddr;
@@ -123,11 +123,11 @@ struct TimeStruct
bool branchTaken;
};
DecodeComm decodeInfo[O3MaxThreads];
DecodeComm decodeInfo[MaxThreads];
struct RenameComm {};
RenameComm renameInfo[O3MaxThreads];
RenameComm renameInfo[MaxThreads];
struct IewComm
{
@@ -146,7 +146,7 @@ struct TimeStruct
bool usedLSQ;
};
IewComm iewInfo[O3MaxThreads];
IewComm iewInfo[MaxThreads];
struct CommitComm
{
@@ -169,14 +169,14 @@ struct TimeStruct
/// Provide fetch the instruction that mispredicted, if this
/// pointer is not-null a misprediction occured
O3DynInstPtr mispredictInst; // *F
DynInstPtr mispredictInst; // *F
/// Instruction that caused the a non-mispredict squash
O3DynInstPtr squashInst; // *F
DynInstPtr squashInst; // *F
/// Hack for now to send back a strictly ordered access to the
/// IEW stage.
O3DynInstPtr strictlyOrderedLoad; // *I
DynInstPtr strictlyOrderedLoad; // *I
/// Communication specifically to the IQ to tell the IQ that it can
/// schedule a non-speculative instruction.
@@ -212,16 +212,16 @@ struct TimeStruct
};
CommitComm commitInfo[O3MaxThreads];
CommitComm commitInfo[MaxThreads];
bool decodeBlock[O3MaxThreads];
bool decodeUnblock[O3MaxThreads];
bool renameBlock[O3MaxThreads];
bool renameUnblock[O3MaxThreads];
bool iewBlock[O3MaxThreads];
bool iewUnblock[O3MaxThreads];
bool decodeBlock[MaxThreads];
bool decodeUnblock[MaxThreads];
bool renameBlock[MaxThreads];
bool renameUnblock[MaxThreads];
bool iewBlock[MaxThreads];
bool iewUnblock[MaxThreads];
};
} // namespace O3Comm
} // namespace o3
#endif //__CPU_O3_COMM_HH__

View File

@@ -64,19 +64,22 @@
#include "debug/ExecFaulting.hh"
#include "debug/HtmCpu.hh"
#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
#include "sim/faults.hh"
#include "sim/full_system.hh"
namespace o3
{
void
DefaultCommit::processTrapEvent(ThreadID tid)
Commit::processTrapEvent(ThreadID tid)
{
// This will get reset by commit if it was switched out at the
// time of this event processing.
trapSquash[tid] = true;
}
DefaultCommit::DefaultCommit(FullO3CPU *_cpu, const DerivO3CPUParams &params)
Commit::Commit(CPU *_cpu, const O3CPUParams &params)
: commitPolicy(params.smtCommitPolicy),
cpu(_cpu),
iewToCommitDelay(params.iewToCommitDelay),
@@ -93,10 +96,10 @@ DefaultCommit::DefaultCommit(FullO3CPU *_cpu, const DerivO3CPUParams &params)
avoidQuiesceLiveLock(false),
stats(_cpu, this)
{
if (commitWidth > O3MaxWidth)
if (commitWidth > MaxWidth)
fatal("commitWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
commitWidth, static_cast<int>(O3MaxWidth));
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
commitWidth, static_cast<int>(MaxWidth));
_status = Active;
_nextStatus = Inactive;
@@ -108,7 +111,7 @@ DefaultCommit::DefaultCommit(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
}
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
commitStatus[tid] = Idle;
changedROBNumEntries[tid] = false;
trapSquash[tid] = false;
@@ -127,24 +130,20 @@ DefaultCommit::DefaultCommit(FullO3CPU *_cpu, const DerivO3CPUParams &params)
interrupt = NoFault;
}
std::string
DefaultCommit::name() const
{
return cpu->name() + ".commit";
}
std::string Commit::name() const { return cpu->name() + ".commit"; }
void
DefaultCommit::regProbePoints()
Commit::regProbePoints()
{
ppCommit = new ProbePointArg<O3DynInstPtr>(
ppCommit = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Commit");
ppCommitStall = new ProbePointArg<O3DynInstPtr>(
ppCommitStall = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "CommitStall");
ppSquash = new ProbePointArg<O3DynInstPtr>(
ppSquash = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Squash");
}
DefaultCommit::CommitStats::CommitStats(FullO3CPU *cpu, DefaultCommit *commit)
Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
: Stats::Group(cpu, "commit"),
ADD_STAT(commitSquashedInsts, Stats::Units::Count::get(),
"The number of squashed insts skipped by commit"),
@@ -243,13 +242,13 @@ DefaultCommit::CommitStats::CommitStats(FullO3CPU *cpu, DefaultCommit *commit)
}
void
DefaultCommit::setThreads(std::vector<O3ThreadState *> &threads)
Commit::setThreads(std::vector<ThreadState *> &threads)
{
thread = threads;
}
void
DefaultCommit::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
Commit::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
@@ -261,7 +260,7 @@ DefaultCommit::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
}
void
DefaultCommit::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
Commit::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
fetchQueue = fq_ptr;
@@ -270,7 +269,7 @@ DefaultCommit::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
}
void
DefaultCommit::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
Commit::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
renameQueue = rq_ptr;
@@ -279,7 +278,7 @@ DefaultCommit::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
}
void
DefaultCommit::setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr)
Commit::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
iewQueue = iq_ptr;
@@ -288,32 +287,28 @@ DefaultCommit::setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr)
}
void
DefaultCommit::setIEWStage(DefaultIEW *iew_stage)
Commit::setIEWStage(IEW *iew_stage)
{
iewStage = iew_stage;
}
void
DefaultCommit::setActiveThreads(std::list<ThreadID> *at_ptr)
Commit::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
void
DefaultCommit::setRenameMap(UnifiedRenameMap rm_ptr[])
Commit::setRenameMap(UnifiedRenameMap rm_ptr[])
{
for (ThreadID tid = 0; tid < numThreads; tid++)
renameMap[tid] = &rm_ptr[tid];
}
void
DefaultCommit::setROB(ROB *rob_ptr)
{
rob = rob_ptr;
}
void Commit::setROB(ROB *rob_ptr) { rob = rob_ptr; }
void
DefaultCommit::startupStage()
Commit::startupStage()
{
rob->setActiveThreads(activeThreads);
rob->resetEntries();
@@ -327,13 +322,13 @@ DefaultCommit::startupStage()
// Commit must broadcast the number of free entries it has at the
// start of the simulation, so it starts as active.
cpu->activateStage(FullO3CPU::CommitIdx);
cpu->activateStage(CPU::CommitIdx);
cpu->activityThisCycle();
}
void
DefaultCommit::clearStates(ThreadID tid)
Commit::clearStates(ThreadID tid)
{
commitStatus[tid] = Idle;
changedROBNumEntries[tid] = false;
@@ -347,21 +342,17 @@ DefaultCommit::clearStates(ThreadID tid)
squashAfterInst[tid] = NULL;
}
void
DefaultCommit::drain()
{
drainPending = true;
}
void Commit::drain() { drainPending = true; }
void
DefaultCommit::drainResume()
Commit::drainResume()
{
drainPending = false;
drainImminent = false;
}
void
DefaultCommit::drainSanityCheck() const
Commit::drainSanityCheck() const
{
assert(isDrained());
rob->drainSanityCheck();
@@ -376,7 +367,7 @@ DefaultCommit::drainSanityCheck() const
}
bool
DefaultCommit::isDrained() const
Commit::isDrained() const
{
/* Make sure no one is executing microcode. There are two reasons
* for this:
@@ -403,7 +394,7 @@ DefaultCommit::isDrained() const
}
void
DefaultCommit::takeOverFrom()
Commit::takeOverFrom()
{
_status = Active;
_nextStatus = Inactive;
@@ -418,7 +409,7 @@ DefaultCommit::takeOverFrom()
}
void
DefaultCommit::deactivateThread(ThreadID tid)
Commit::deactivateThread(ThreadID tid)
{
std::list<ThreadID>::iterator thread_it = std::find(priority_list.begin(),
priority_list.end(), tid);
@@ -429,7 +420,7 @@ DefaultCommit::deactivateThread(ThreadID tid)
}
bool
DefaultCommit::executingHtmTransaction(ThreadID tid) const
Commit::executingHtmTransaction(ThreadID tid) const
{
if (tid == InvalidThreadID)
return false;
@@ -438,7 +429,7 @@ DefaultCommit::executingHtmTransaction(ThreadID tid) const
}
void
DefaultCommit::resetHtmStartsStops(ThreadID tid)
Commit::resetHtmStartsStops(ThreadID tid)
{
if (tid != InvalidThreadID)
{
@@ -449,7 +440,7 @@ DefaultCommit::resetHtmStartsStops(ThreadID tid)
void
DefaultCommit::updateStatus()
Commit::updateStatus()
{
// reset ROB changed variable
std::list<ThreadID>::iterator threads = activeThreads->begin();
@@ -469,17 +460,17 @@ DefaultCommit::updateStatus()
if (_nextStatus == Inactive && _status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::CommitIdx);
cpu->deactivateStage(CPU::CommitIdx);
} else if (_nextStatus == Active && _status == Inactive) {
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(FullO3CPU::CommitIdx);
cpu->activateStage(CPU::CommitIdx);
}
_status = _nextStatus;
}
bool
DefaultCommit::changedROBEntries()
Commit::changedROBEntries()
{
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
@@ -496,13 +487,13 @@ DefaultCommit::changedROBEntries()
}
size_t
DefaultCommit::numROBFreeEntries(ThreadID tid)
Commit::numROBFreeEntries(ThreadID tid)
{
return rob->numFreeEntries(tid);
}
void
DefaultCommit::generateTrapEvent(ThreadID tid, Fault inst_fault)
Commit::generateTrapEvent(ThreadID tid, Fault inst_fault)
{
DPRINTF(Commit, "Generating trap event for [tid:%i]\n", tid);
@@ -527,7 +518,7 @@ DefaultCommit::generateTrapEvent(ThreadID tid, Fault inst_fault)
}
void
DefaultCommit::generateTCEvent(ThreadID tid)
Commit::generateTCEvent(ThreadID tid)
{
assert(!trapInFlight[tid]);
DPRINTF(Commit, "Generating TC squash event for [tid:%i]\n", tid);
@@ -536,7 +527,7 @@ DefaultCommit::generateTCEvent(ThreadID tid)
}
void
DefaultCommit::squashAll(ThreadID tid)
Commit::squashAll(ThreadID tid)
{
// If we want to include the squashing instruction in the squash,
// then use one older sequence number.
@@ -571,7 +562,7 @@ DefaultCommit::squashAll(ThreadID tid)
}
void
DefaultCommit::squashFromTrap(ThreadID tid)
Commit::squashFromTrap(ThreadID tid)
{
squashAll(tid);
@@ -588,7 +579,7 @@ DefaultCommit::squashFromTrap(ThreadID tid)
}
void
DefaultCommit::squashFromTC(ThreadID tid)
Commit::squashFromTC(ThreadID tid)
{
squashAll(tid);
@@ -604,7 +595,7 @@ DefaultCommit::squashFromTC(ThreadID tid)
}
void
DefaultCommit::squashFromSquashAfter(ThreadID tid)
Commit::squashFromSquashAfter(ThreadID tid)
{
DPRINTF(Commit, "Squashing after squash after request, "
"restarting at PC %s\n", pc[tid]);
@@ -621,7 +612,7 @@ DefaultCommit::squashFromSquashAfter(ThreadID tid)
}
void
DefaultCommit::squashAfter(ThreadID tid, const O3DynInstPtr &head_inst)
Commit::squashAfter(ThreadID tid, const DynInstPtr &head_inst)
{
DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%llu]\n",
tid, head_inst->seqNum);
@@ -632,7 +623,7 @@ DefaultCommit::squashAfter(ThreadID tid, const O3DynInstPtr &head_inst)
}
void
DefaultCommit::tick()
Commit::tick()
{
wroteToTimeBuffer = false;
_nextStatus = Inactive;
@@ -680,14 +671,14 @@ DefaultCommit::tick()
// will be active.
_nextStatus = Active;
GEM5_VAR_USED const O3DynInstPtr &inst = rob->readHeadInst(tid);
GEM5_VAR_USED const DynInstPtr &inst = rob->readHeadInst(tid);
DPRINTF(Commit,"[tid:%i] Instruction [sn:%llu] PC %s is head of"
" ROB and ready to commit\n",
tid, inst->seqNum, inst->pcState());
} else if (!rob->isEmpty(tid)) {
const O3DynInstPtr &inst = rob->readHeadInst(tid);
const DynInstPtr &inst = rob->readHeadInst(tid);
ppCommitStall->notify(inst);
@@ -710,7 +701,7 @@ DefaultCommit::tick()
}
void
DefaultCommit::handleInterrupt()
Commit::handleInterrupt()
{
// Verify that we still have an interrupt to handle
if (!cpu->checkInterrupts(0)) {
@@ -763,7 +754,7 @@ DefaultCommit::handleInterrupt()
}
void
DefaultCommit::propagateInterrupt()
Commit::propagateInterrupt()
{
// Don't propagate intterupts if we are currently handling a trap or
// in draining and the last observable instruction has been committed.
@@ -787,7 +778,7 @@ DefaultCommit::propagateInterrupt()
}
void
DefaultCommit::commit()
Commit::commit()
{
if (FullSystem) {
// Check if we have a interrupt and get read to handle it
@@ -949,7 +940,7 @@ DefaultCommit::commit()
}
void
DefaultCommit::commitInsts()
Commit::commitInsts()
{
////////////////////////////////////
// Handle commit
@@ -964,7 +955,7 @@ DefaultCommit::commitInsts()
unsigned num_committed = 0;
O3DynInstPtr head_inst;
DynInstPtr head_inst;
// Commit as many instructions as possible until the commit bandwidth
// limit is reached, or it becomes impossible to commit any more.
@@ -1155,7 +1146,7 @@ DefaultCommit::commitInsts()
}
bool
DefaultCommit::commitHead(const O3DynInstPtr &head_inst, unsigned inst_num)
Commit::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
{
assert(head_inst);
@@ -1345,7 +1336,7 @@ DefaultCommit::commitHead(const O3DynInstPtr &head_inst, unsigned inst_num)
}
void
DefaultCommit::getInsts()
Commit::getInsts()
{
DPRINTF(Commit, "Getting instructions from Rename stage.\n");
@@ -1353,7 +1344,7 @@ DefaultCommit::getInsts()
int insts_to_process = std::min((int)renameWidth, fromRename->size);
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
const O3DynInstPtr &inst = fromRename->insts[inst_num];
const DynInstPtr &inst = fromRename->insts[inst_num];
ThreadID tid = inst->threadNumber;
if (!inst->isSquashed() &&
@@ -1378,7 +1369,7 @@ DefaultCommit::getInsts()
}
void
DefaultCommit::markCompletedInsts()
Commit::markCompletedInsts()
{
// Grab completed insts out of the IEW instruction queue, and mark
// instructions completed within the ROB.
@@ -1398,7 +1389,7 @@ DefaultCommit::markCompletedInsts()
}
void
DefaultCommit::updateComInstStats(const O3DynInstPtr &inst)
Commit::updateComInstStats(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -1460,7 +1451,7 @@ DefaultCommit::updateComInstStats(const O3DynInstPtr &inst)
// //
////////////////////////////////////////
ThreadID
DefaultCommit::getCommittingThread()
Commit::getCommittingThread()
{
if (numThreads > 1) {
switch (commitPolicy) {
@@ -1495,7 +1486,7 @@ DefaultCommit::getCommittingThread()
}
ThreadID
DefaultCommit::roundRobin()
Commit::roundRobin()
{
std::list<ThreadID>::iterator pri_iter = priority_list.begin();
std::list<ThreadID>::iterator end = priority_list.end();
@@ -1522,7 +1513,7 @@ DefaultCommit::roundRobin()
}
ThreadID
DefaultCommit::oldestReady()
Commit::oldestReady()
{
unsigned oldest = 0;
bool first = true;
@@ -1540,7 +1531,7 @@ DefaultCommit::oldestReady()
if (rob->isHeadReady(tid)) {
const O3DynInstPtr &head_inst = rob->readHeadInst(tid);
const DynInstPtr &head_inst = rob->readHeadInst(tid);
if (first) {
oldest = tid;
@@ -1558,3 +1549,5 @@ DefaultCommit::oldestReady()
return InvalidThreadID;
}
}
} // namespace o3

View File

@@ -56,12 +56,15 @@
#include "enums/CommitPolicy.hh"
#include "sim/probe/probe.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
class O3ThreadState;
namespace o3
{
class ThreadState;
/**
* DefaultCommit handles single threaded and SMT commit. Its width is
* Commit handles single threaded and SMT commit. Its width is
* specified by the parameters; each cycle it tries to commit that
* many instructions. The SMT policy decides which thread it tries to
* commit instructions from. Non- speculative instructions must reach
@@ -82,7 +85,7 @@ class O3ThreadState;
* supports multiple cycle squashing, to model a ROB that can only
* remove a certain number of instructions per cycle.
*/
class DefaultCommit
class Commit
{
public:
/** Overall commit status. Used to determine if the CPU can deschedule
@@ -111,57 +114,57 @@ class DefaultCommit
/** Next commit status, to be set at the end of the cycle. */
CommitStatus _nextStatus;
/** Per-thread status. */
ThreadStatus commitStatus[O3MaxThreads];
ThreadStatus commitStatus[MaxThreads];
/** Commit policy used in SMT mode. */
CommitPolicy commitPolicy;
/** Probe Points. */
ProbePointArg<O3DynInstPtr> *ppCommit;
ProbePointArg<O3DynInstPtr> *ppCommitStall;
ProbePointArg<DynInstPtr> *ppCommit;
ProbePointArg<DynInstPtr> *ppCommitStall;
/** To probe when an instruction is squashed */
ProbePointArg<O3DynInstPtr> *ppSquash;
ProbePointArg<DynInstPtr> *ppSquash;
/** Mark the thread as processing a trap. */
void processTrapEvent(ThreadID tid);
public:
/** Construct a DefaultCommit with the given parameters. */
DefaultCommit(FullO3CPU *_cpu, const DerivO3CPUParams &params);
/** Construct a Commit with the given parameters. */
Commit(CPU *_cpu, const O3CPUParams &params);
/** Returns the name of the DefaultCommit. */
/** Returns the name of the Commit. */
std::string name() const;
/** Registers probes. */
void regProbePoints();
/** Sets the list of threads. */
void setThreads(std::vector<O3ThreadState *> &threads);
void setThreads(std::vector<ThreadState *> &threads);
/** Sets the main time buffer pointer, used for backwards communication. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
void setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr);
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Sets the pointer to the queue coming from rename. */
void setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr);
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr);
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
/** Sets the pointer to the IEW stage. */
void setIEWStage(DefaultIEW *iew_stage);
void setIEWStage(IEW *iew_stage);
/** The pointer to the IEW stage. Used solely to ensure that
* various events (traps, interrupts, syscalls) do not occur until
* all stores have written back.
*/
DefaultIEW *iewStage;
IEW *iewStage;
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to the commited state rename map. */
void setRenameMap(UnifiedRenameMap rm_ptr[O3MaxThreads]);
void setRenameMap(UnifiedRenameMap rm_ptr[MaxThreads]);
/** Sets pointer to the ROB. */
void setROB(ROB *rob_ptr);
@@ -267,7 +270,7 @@ class DefaultCommit
* @param tid ID of the thread to squash.
* @param head_inst Instruction that requested the squash.
*/
void squashAfter(ThreadID tid, const O3DynInstPtr &head_inst);
void squashAfter(ThreadID tid, const DynInstPtr &head_inst);
/** Handles processing an interrupt. */
void handleInterrupt();
@@ -281,7 +284,7 @@ class DefaultCommit
/** Tries to commit the head ROB instruction passed in.
* @param head_inst The instruction to be committed.
*/
bool commitHead(const O3DynInstPtr &head_inst, unsigned inst_num);
bool commitHead(const DynInstPtr &head_inst, unsigned inst_num);
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
@@ -317,29 +320,29 @@ class DefaultCommit
private:
/** Time buffer interface. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to write information heading to previous stages. */
TimeBuffer<O3Comm::TimeStruct>::wire toIEW;
TimeBuffer<TimeStruct>::wire toIEW;
/** Wire to read information from IEW (for ROB). */
TimeBuffer<O3Comm::TimeStruct>::wire robInfoFromIEW;
TimeBuffer<TimeStruct>::wire robInfoFromIEW;
TimeBuffer<O3Comm::FetchStruct> *fetchQueue;
TimeBuffer<FetchStruct> *fetchQueue;
TimeBuffer<O3Comm::FetchStruct>::wire fromFetch;
TimeBuffer<FetchStruct>::wire fromFetch;
/** IEW instruction queue interface. */
TimeBuffer<O3Comm::IEWStruct> *iewQueue;
TimeBuffer<IEWStruct> *iewQueue;
/** Wire to read information from IEW queue. */
TimeBuffer<O3Comm::IEWStruct>::wire fromIEW;
TimeBuffer<IEWStruct>::wire fromIEW;
/** Rename instruction queue interface, for ROB. */
TimeBuffer<O3Comm::RenameStruct> *renameQueue;
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to read information from rename queue. */
TimeBuffer<O3Comm::RenameStruct>::wire fromRename;
TimeBuffer<RenameStruct>::wire fromRename;
public:
/** ROB interface. */
@@ -347,10 +350,10 @@ class DefaultCommit
private:
/** Pointer to O3CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Vector of all of the threads. */
std::vector<O3ThreadState *> thread;
std::vector<ThreadState *> thread;
/** Records that commit has written to the time buffer this cycle. Used for
* the CPU to determine if it can deschedule itself if there is no activity.
@@ -360,13 +363,13 @@ class DefaultCommit
/** Records if the number of ROB entries has changed this cycle. If it has,
* then the number of free entries must be re-broadcast.
*/
bool changedROBNumEntries[O3MaxThreads];
bool changedROBNumEntries[MaxThreads];
/** Records if a thread has to squash this cycle due to a trap. */
bool trapSquash[O3MaxThreads];
bool trapSquash[MaxThreads];
/** Records if a thread has to squash this cycle due to an XC write. */
bool tcSquash[O3MaxThreads];
bool tcSquash[MaxThreads];
/**
* Instruction passed to squashAfter().
@@ -375,7 +378,7 @@ class DefaultCommit
* that caused a squash since this needs to be passed to the fetch
* stage once squashing starts.
*/
O3DynInstPtr squashAfterInst[O3MaxThreads];
DynInstPtr squashAfterInst[MaxThreads];
/** Priority List used for Commit Policy */
std::list<ThreadID> priority_list;
@@ -425,29 +428,29 @@ class DefaultCommit
/** The commit PC state of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
TheISA::PCState pc[O3MaxThreads];
TheISA::PCState pc[MaxThreads];
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[O3MaxThreads];
InstSeqNum youngestSeqNum[MaxThreads];
/** The sequence number of the last commited instruction. */
InstSeqNum lastCommitedSeqNum[O3MaxThreads];
InstSeqNum lastCommitedSeqNum[MaxThreads];
/** Records if there is a trap currently in flight. */
bool trapInFlight[O3MaxThreads];
bool trapInFlight[MaxThreads];
/** Records if there were any stores committed this cycle. */
bool committedStores[O3MaxThreads];
bool committedStores[MaxThreads];
/** Records if commit should check if the ROB is truly empty (see
commit_impl.hh). */
bool checkEmptyROB[O3MaxThreads];
bool checkEmptyROB[MaxThreads];
/** Pointer to the list of active threads. */
std::list<ThreadID> *activeThreads;
/** Rename map interface. */
UnifiedRenameMap *renameMap[O3MaxThreads];
UnifiedRenameMap *renameMap[MaxThreads];
/** True if last committed microop can be followed by an interrupt */
bool canHandleInterrupts;
@@ -459,15 +462,15 @@ class DefaultCommit
bool avoidQuiesceLiveLock;
/** Updates commit stats based on this instruction. */
void updateComInstStats(const O3DynInstPtr &inst);
void updateComInstStats(const DynInstPtr &inst);
// HTM
int htmStarts[O3MaxThreads];
int htmStops[O3MaxThreads];
int htmStarts[MaxThreads];
int htmStops[MaxThreads];
struct CommitStats : public Stats::Group
{
CommitStats(FullO3CPU *cpu, DefaultCommit *commit);
CommitStats(CPU *cpu, Commit *commit);
/** Stat for the total number of squashed instructions discarded by
* commit.
*/
@@ -513,4 +516,6 @@ class DefaultCommit
} stats;
};
} // namespace o3
#endif // __CPU_O3_COMMIT_HH__

View File

@@ -64,12 +64,15 @@
struct BaseCPUParams;
FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
namespace o3
{
CPU::CPU(const O3CPUParams &params)
: BaseCPU(params),
mmu(params.mmu),
tickEvent([this]{ tick(); }, "FullO3CPU tick",
tickEvent([this]{ tick(); }, "O3CPU tick",
false, Event::CPU_Tick_Pri),
threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads",
threadExitEvent([this]{ exitThreads(); }, "O3CPU exit threads",
false, Event::CPU_Exit_Pri),
#ifndef NDEBUG
instcount(0),
@@ -130,7 +133,7 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
if (params.checker) {
BaseCPU *temp_checker = params.checker;
checker = dynamic_cast<Checker<O3DynInstPtr> *>(temp_checker);
checker = dynamic_cast<Checker<DynInstPtr> *>(temp_checker);
checker->setIcachePort(&fetch.getInstPort());
checker->setSystem(params.system);
} else {
@@ -144,7 +147,7 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
// The stages also need their CPU pointer setup. However this
// must be done at the upper level CPU because they have pointers
// to the upper level CPU, and not this FullO3CPU.
// to the upper level CPU, and not this CPU.
// Set up Pointers to the activeThreads list for each stage
fetch.setActiveThreads(&activeThreads);
@@ -182,8 +185,8 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
} else {
active_threads = params.workload.size();
if (active_threads > O3MaxThreads) {
panic("Workload Size too large. Increase the 'O3MaxThreads' "
if (active_threads > MaxThreads) {
panic("Workload Size too large. Increase the 'MaxThreads' "
"constant in cpu/o3/limits.hh or edit your workload size.");
}
}
@@ -303,33 +306,32 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
if (FullSystem) {
// SMT is not supported in FS mode yet.
assert(numThreads == 1);
thread[tid] = new O3ThreadState(this, 0, NULL);
thread[tid] = new ThreadState(this, 0, NULL);
} else {
if (tid < params.workload.size()) {
DPRINTF(O3CPU, "Workload[%i] process is %#x", tid,
thread[tid]);
thread[tid] = new O3ThreadState(this, tid,
params.workload[tid]);
thread[tid] = new ThreadState(this, tid, params.workload[tid]);
} else {
//Allocate Empty thread so M5 can use later
//when scheduling threads to CPU
Process* dummy_proc = NULL;
thread[tid] = new O3ThreadState(this, tid, dummy_proc);
thread[tid] = new ThreadState(this, tid, dummy_proc);
}
}
ThreadContext *tc;
::ThreadContext *tc;
// Setup the TC that will serve as the interface to the threads/CPU.
O3ThreadContext *o3_tc = new O3ThreadContext;
auto *o3_tc = new ThreadContext;
tc = o3_tc;
// If we're using a checker, then the TC should be the
// CheckerThreadContext.
if (params.checker) {
tc = new CheckerThreadContext<O3ThreadContext>(o3_tc, checker);
tc = new CheckerThreadContext<ThreadContext>(o3_tc, checker);
}
o3_tc->cpu = this;
@@ -342,9 +344,9 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
threadContexts.push_back(tc);
}
// FullO3CPU always requires an interrupt controller.
// O3CPU always requires an interrupt controller.
if (!params.switched_out && interrupts.empty()) {
fatal("FullO3CPU %s has no interrupt controller.\n"
fatal("O3CPU %s has no interrupt controller.\n"
"Ensure createInterruptController() is called.\n", name());
}
@@ -353,14 +355,14 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams &params)
}
void
FullO3CPU::regProbePoints()
CPU::regProbePoints()
{
BaseCPU::regProbePoints();
ppInstAccessComplete = new ProbePointArg<PacketPtr>(
getProbeManager(), "InstAccessComplete");
ppDataAccessComplete = new ProbePointArg<
std::pair<O3DynInstPtr, PacketPtr>>(
std::pair<DynInstPtr, PacketPtr>>(
getProbeManager(), "DataAccessComplete");
fetch.regProbePoints();
@@ -369,7 +371,7 @@ FullO3CPU::regProbePoints()
commit.regProbePoints();
}
FullO3CPU::FullO3CPUStats::FullO3CPUStats(FullO3CPU *cpu)
CPU::CPUStats::CPUStats(CPU *cpu)
: Stats::Group(cpu),
ADD_STAT(timesIdled, Stats::Units::Count::get(),
"Number of times that the entire CPU went into an idle state "
@@ -434,7 +436,7 @@ FullO3CPU::FullO3CPUStats::FullO3CPUStats(FullO3CPU *cpu)
// Number of Instructions simulated
// --------------------------------
// Should probably be in Base CPU but need templated
// O3MaxThreads so put in here instead
// MaxThreads so put in here instead
committedInsts
.init(cpu->numThreads)
.flags(Stats::total);
@@ -497,9 +499,9 @@ FullO3CPU::FullO3CPUStats::FullO3CPUStats(FullO3CPU *cpu)
}
void
FullO3CPU::tick()
CPU::tick()
{
DPRINTF(O3CPU, "\n\nFullO3CPU: Ticking main, FullO3CPU.\n");
DPRINTF(O3CPU, "\n\nO3CPU: Ticking main, O3CPU.\n");
assert(!switchedOut());
assert(drainState() != DrainState::Drained);
@@ -555,7 +557,7 @@ FullO3CPU::tick()
}
void
FullO3CPU::init()
CPU::init()
{
BaseCPU::init();
@@ -575,7 +577,7 @@ FullO3CPU::init()
}
void
FullO3CPU::startup()
CPU::startup()
{
BaseCPU::startup();
@@ -587,7 +589,7 @@ FullO3CPU::startup()
}
void
FullO3CPU::activateThread(ThreadID tid)
CPU::activateThread(ThreadID tid)
{
std::list<ThreadID>::iterator isActive =
std::find(activeThreads.begin(), activeThreads.end(), tid);
@@ -596,15 +598,14 @@ FullO3CPU::activateThread(ThreadID tid)
assert(!switchedOut());
if (isActive == activeThreads.end()) {
DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n",
tid);
DPRINTF(O3CPU, "[tid:%i] Adding to active threads list\n", tid);
activeThreads.push_back(tid);
}
}
void
FullO3CPU::deactivateThread(ThreadID tid)
CPU::deactivateThread(ThreadID tid)
{
// hardware transactional memory
// shouldn't deactivate thread in the middle of a transaction
@@ -628,7 +629,7 @@ FullO3CPU::deactivateThread(ThreadID tid)
}
Counter
FullO3CPU::totalInsts() const
CPU::totalInsts() const
{
Counter total(0);
@@ -640,7 +641,7 @@ FullO3CPU::totalInsts() const
}
Counter
FullO3CPU::totalOps() const
CPU::totalOps() const
{
Counter total(0);
@@ -652,7 +653,7 @@ FullO3CPU::totalOps() const
}
void
FullO3CPU::activateContext(ThreadID tid)
CPU::activateContext(ThreadID tid)
{
assert(!switchedOut());
@@ -690,7 +691,7 @@ FullO3CPU::activateContext(ThreadID tid)
}
void
FullO3CPU::suspendContext(ThreadID tid)
CPU::suspendContext(ThreadID tid)
{
DPRINTF(O3CPU,"[tid:%i] Suspending Thread Context.\n", tid);
assert(!switchedOut());
@@ -710,7 +711,7 @@ FullO3CPU::suspendContext(ThreadID tid)
}
void
FullO3CPU::haltContext(ThreadID tid)
CPU::haltContext(ThreadID tid)
{
//For now, this is the same as deallocate
DPRINTF(O3CPU,"[tid:%i] Halt Context called. Deallocating\n", tid);
@@ -732,12 +733,12 @@ FullO3CPU::haltContext(ThreadID tid)
}
void
FullO3CPU::insertThread(ThreadID tid)
CPU::insertThread(ThreadID tid)
{
DPRINTF(O3CPU,"[tid:%i] Initializing thread into CPU");
// Will change now that the PC and thread state is internal to the CPU
// and not in the ThreadContext.
ThreadContext *src_tc;
::ThreadContext *src_tc;
if (FullSystem)
src_tc = system->threads[tid];
else
@@ -772,7 +773,7 @@ FullO3CPU::insertThread(ThreadID tid)
//Set PC/NPC/NNPC
pcState(src_tc->pcState(), tid);
src_tc->setStatus(ThreadContext::Active);
src_tc->setStatus(::ThreadContext::Active);
activateContext(tid);
@@ -781,7 +782,7 @@ FullO3CPU::insertThread(ThreadID tid)
}
void
FullO3CPU::removeThread(ThreadID tid)
CPU::removeThread(ThreadID tid)
{
DPRINTF(O3CPU,"[tid:%i] Removing thread context from CPU.\n", tid);
@@ -833,7 +834,7 @@ FullO3CPU::removeThread(ThreadID tid)
}
void
FullO3CPU::setVectorsAsReady(ThreadID tid)
CPU::setVectorsAsReady(ThreadID tid)
{
const auto &regClasses = isa[tid]->regClasses();
@@ -856,7 +857,7 @@ FullO3CPU::setVectorsAsReady(ThreadID tid)
}
void
FullO3CPU::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
CPU::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
{
auto pc = pcState(tid);
@@ -875,14 +876,14 @@ FullO3CPU::switchRenameMode(ThreadID tid, UnifiedFreeList* freelist)
}
Fault
FullO3CPU::getInterrupts()
CPU::getInterrupts()
{
// Check if there are any outstanding interrupts
return interrupts[0]->getInterrupt();
}
void
FullO3CPU::processInterrupts(const Fault &interrupt)
CPU::processInterrupts(const Fault &interrupt)
{
// Check for interrupts here. For now can copy the code that
// exists within isa_fullsys_traits.hh. Also assume that thread 0
@@ -898,26 +899,26 @@ FullO3CPU::processInterrupts(const Fault &interrupt)
}
void
FullO3CPU::trap(const Fault &fault, ThreadID tid, const StaticInstPtr &inst)
CPU::trap(const Fault &fault, ThreadID tid, const StaticInstPtr &inst)
{
// Pass the thread's TC into the invoke method.
fault->invoke(threadContexts[tid], inst);
}
void
FullO3CPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
CPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
{
thread[tid]->serialize(cp);
}
void
FullO3CPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
CPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
{
thread[tid]->unserialize(cp);
}
DrainState
FullO3CPU::drain()
CPU::drain()
{
// Deschedule any power gating event (if any)
deschedulePowerGatingEvent();
@@ -941,7 +942,7 @@ FullO3CPU::drain()
if (!isCpuDrained()) {
// If a thread is suspended, wake it up so it can be drained
for (auto t : threadContexts) {
if (t->status() == ThreadContext::Suspended){
if (t->status() == ::ThreadContext::Suspended){
DPRINTF(Drain, "Currently suspended so activate %i \n",
t->threadId());
t->activate();
@@ -979,7 +980,7 @@ FullO3CPU::drain()
}
bool
FullO3CPU::tryDrain()
CPU::tryDrain()
{
if (drainState() != DrainState::Draining || !isCpuDrained())
return false;
@@ -994,7 +995,7 @@ FullO3CPU::tryDrain()
}
void
FullO3CPU::drainSanityCheck() const
CPU::drainSanityCheck() const
{
assert(isCpuDrained());
fetch.drainSanityCheck();
@@ -1005,7 +1006,7 @@ FullO3CPU::drainSanityCheck() const
}
bool
FullO3CPU::isCpuDrained() const
CPU::isCpuDrained() const
{
bool drained(true);
@@ -1042,14 +1043,10 @@ FullO3CPU::isCpuDrained() const
return drained;
}
void
FullO3CPU::commitDrained(ThreadID tid)
{
fetch.drainStall(tid);
}
void CPU::commitDrained(ThreadID tid) { fetch.drainStall(tid); }
void
FullO3CPU::drainResume()
CPU::drainResume()
{
if (switchedOut())
return;
@@ -1062,7 +1059,7 @@ FullO3CPU::drainResume()
_status = Idle;
for (ThreadID i = 0; i < thread.size(); i++) {
if (thread[i]->status() == ThreadContext::Active) {
if (thread[i]->status() == ::ThreadContext::Active) {
DPRINTF(Drain, "Activating thread: %i\n", i);
activateThread(i);
_status = Running;
@@ -1078,7 +1075,7 @@ FullO3CPU::drainResume()
}
void
FullO3CPU::switchOut()
CPU::switchOut()
{
DPRINTF(O3CPU, "Switching out\n");
BaseCPU::switchOut();
@@ -1092,7 +1089,7 @@ FullO3CPU::switchOut()
}
void
FullO3CPU::takeOverFrom(BaseCPU *oldCPU)
CPU::takeOverFrom(BaseCPU *oldCPU)
{
BaseCPU::takeOverFrom(oldCPU);
@@ -1104,7 +1101,7 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU)
assert(!tickEvent.scheduled());
auto *oldO3CPU = dynamic_cast<FullO3CPU *>(oldCPU);
auto *oldO3CPU = dynamic_cast<CPU *>(oldCPU);
if (oldO3CPU)
globalSeqNum = oldO3CPU->globalSeqNum;
@@ -1113,7 +1110,7 @@ FullO3CPU::takeOverFrom(BaseCPU *oldCPU)
}
void
FullO3CPU::verifyMemoryMode() const
CPU::verifyMemoryMode() const
{
if (!system->isTimingMode()) {
fatal("The O3 CPU requires the memory system to be in "
@@ -1122,117 +1119,117 @@ FullO3CPU::verifyMemoryMode() const
}
RegVal
FullO3CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
{
return isa[tid]->readMiscRegNoEffect(misc_reg);
}
RegVal
FullO3CPU::readMiscReg(int misc_reg, ThreadID tid)
CPU::readMiscReg(int misc_reg, ThreadID tid)
{
cpuStats.miscRegfileReads++;
return isa[tid]->readMiscReg(misc_reg);
}
void
FullO3CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
{
isa[tid]->setMiscRegNoEffect(misc_reg, val);
}
void
FullO3CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
{
cpuStats.miscRegfileWrites++;
isa[tid]->setMiscReg(misc_reg, val);
}
RegVal
FullO3CPU::readIntReg(PhysRegIdPtr phys_reg)
CPU::readIntReg(PhysRegIdPtr phys_reg)
{
cpuStats.intRegfileReads++;
return regFile.readIntReg(phys_reg);
}
RegVal
FullO3CPU::readFloatReg(PhysRegIdPtr phys_reg)
CPU::readFloatReg(PhysRegIdPtr phys_reg)
{
cpuStats.fpRegfileReads++;
return regFile.readFloatReg(phys_reg);
}
const TheISA::VecRegContainer&
FullO3CPU::readVecReg(PhysRegIdPtr phys_reg) const
CPU::readVecReg(PhysRegIdPtr phys_reg) const
{
cpuStats.vecRegfileReads++;
return regFile.readVecReg(phys_reg);
}
TheISA::VecRegContainer&
FullO3CPU::getWritableVecReg(PhysRegIdPtr phys_reg)
CPU::getWritableVecReg(PhysRegIdPtr phys_reg)
{
cpuStats.vecRegfileWrites++;
return regFile.getWritableVecReg(phys_reg);
}
const TheISA::VecElem&
FullO3CPU::readVecElem(PhysRegIdPtr phys_reg) const
CPU::readVecElem(PhysRegIdPtr phys_reg) const
{
cpuStats.vecRegfileReads++;
return regFile.readVecElem(phys_reg);
}
const TheISA::VecPredRegContainer&
FullO3CPU::readVecPredReg(PhysRegIdPtr phys_reg) const
CPU::readVecPredReg(PhysRegIdPtr phys_reg) const
{
cpuStats.vecPredRegfileReads++;
return regFile.readVecPredReg(phys_reg);
}
TheISA::VecPredRegContainer&
FullO3CPU::getWritableVecPredReg(PhysRegIdPtr phys_reg)
CPU::getWritableVecPredReg(PhysRegIdPtr phys_reg)
{
cpuStats.vecPredRegfileWrites++;
return regFile.getWritableVecPredReg(phys_reg);
}
RegVal
FullO3CPU::readCCReg(PhysRegIdPtr phys_reg)
CPU::readCCReg(PhysRegIdPtr phys_reg)
{
cpuStats.ccRegfileReads++;
return regFile.readCCReg(phys_reg);
}
void
FullO3CPU::setIntReg(PhysRegIdPtr phys_reg, RegVal val)
CPU::setIntReg(PhysRegIdPtr phys_reg, RegVal val)
{
cpuStats.intRegfileWrites++;
regFile.setIntReg(phys_reg, val);
}
void
FullO3CPU::setFloatReg(PhysRegIdPtr phys_reg, RegVal val)
CPU::setFloatReg(PhysRegIdPtr phys_reg, RegVal val)
{
cpuStats.fpRegfileWrites++;
regFile.setFloatReg(phys_reg, val);
}
void
FullO3CPU::setVecReg(PhysRegIdPtr phys_reg, const TheISA::VecRegContainer& val)
CPU::setVecReg(PhysRegIdPtr phys_reg, const TheISA::VecRegContainer& val)
{
cpuStats.vecRegfileWrites++;
regFile.setVecReg(phys_reg, val);
}
void
FullO3CPU::setVecElem(PhysRegIdPtr phys_reg, const TheISA::VecElem& val)
CPU::setVecElem(PhysRegIdPtr phys_reg, const TheISA::VecElem& val)
{
cpuStats.vecRegfileWrites++;
regFile.setVecElem(phys_reg, val);
}
void
FullO3CPU::setVecPredReg(PhysRegIdPtr phys_reg,
CPU::setVecPredReg(PhysRegIdPtr phys_reg,
const TheISA::VecPredRegContainer& val)
{
cpuStats.vecPredRegfileWrites++;
@@ -1240,14 +1237,14 @@ FullO3CPU::setVecPredReg(PhysRegIdPtr phys_reg,
}
void
FullO3CPU::setCCReg(PhysRegIdPtr phys_reg, RegVal val)
CPU::setCCReg(PhysRegIdPtr phys_reg, RegVal val)
{
cpuStats.ccRegfileWrites++;
regFile.setCCReg(phys_reg, val);
}
RegVal
FullO3CPU::readArchIntReg(int reg_idx, ThreadID tid)
CPU::readArchIntReg(int reg_idx, ThreadID tid)
{
cpuStats.intRegfileReads++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1257,7 +1254,7 @@ FullO3CPU::readArchIntReg(int reg_idx, ThreadID tid)
}
RegVal
FullO3CPU::readArchFloatReg(int reg_idx, ThreadID tid)
CPU::readArchFloatReg(int reg_idx, ThreadID tid)
{
cpuStats.fpRegfileReads++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1267,7 +1264,7 @@ FullO3CPU::readArchFloatReg(int reg_idx, ThreadID tid)
}
const TheISA::VecRegContainer&
FullO3CPU::readArchVecReg(int reg_idx, ThreadID tid) const
CPU::readArchVecReg(int reg_idx, ThreadID tid) const
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
RegId(VecRegClass, reg_idx));
@@ -1275,7 +1272,7 @@ FullO3CPU::readArchVecReg(int reg_idx, ThreadID tid) const
}
TheISA::VecRegContainer&
FullO3CPU::getWritableArchVecReg(int reg_idx, ThreadID tid)
CPU::getWritableArchVecReg(int reg_idx, ThreadID tid)
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
RegId(VecRegClass, reg_idx));
@@ -1283,7 +1280,7 @@ FullO3CPU::getWritableArchVecReg(int reg_idx, ThreadID tid)
}
const TheISA::VecElem&
FullO3CPU::readArchVecElem(
CPU::readArchVecElem(
const RegIndex& reg_idx, const ElemIndex& ldx, ThreadID tid) const
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1292,7 +1289,7 @@ FullO3CPU::readArchVecElem(
}
const TheISA::VecPredRegContainer&
FullO3CPU::readArchVecPredReg(int reg_idx, ThreadID tid) const
CPU::readArchVecPredReg(int reg_idx, ThreadID tid) const
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
RegId(VecPredRegClass, reg_idx));
@@ -1300,7 +1297,7 @@ FullO3CPU::readArchVecPredReg(int reg_idx, ThreadID tid) const
}
TheISA::VecPredRegContainer&
FullO3CPU::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
CPU::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
RegId(VecPredRegClass, reg_idx));
@@ -1308,7 +1305,7 @@ FullO3CPU::getWritableArchVecPredReg(int reg_idx, ThreadID tid)
}
RegVal
FullO3CPU::readArchCCReg(int reg_idx, ThreadID tid)
CPU::readArchCCReg(int reg_idx, ThreadID tid)
{
cpuStats.ccRegfileReads++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1318,7 +1315,7 @@ FullO3CPU::readArchCCReg(int reg_idx, ThreadID tid)
}
void
FullO3CPU::setArchIntReg(int reg_idx, RegVal val, ThreadID tid)
CPU::setArchIntReg(int reg_idx, RegVal val, ThreadID tid)
{
cpuStats.intRegfileWrites++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1328,7 +1325,7 @@ FullO3CPU::setArchIntReg(int reg_idx, RegVal val, ThreadID tid)
}
void
FullO3CPU::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid)
CPU::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid)
{
cpuStats.fpRegfileWrites++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1338,7 +1335,7 @@ FullO3CPU::setArchFloatReg(int reg_idx, RegVal val, ThreadID tid)
}
void
FullO3CPU::setArchVecReg(int reg_idx, const TheISA::VecRegContainer& val,
CPU::setArchVecReg(int reg_idx, const TheISA::VecRegContainer& val,
ThreadID tid)
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1347,7 +1344,7 @@ FullO3CPU::setArchVecReg(int reg_idx, const TheISA::VecRegContainer& val,
}
void
FullO3CPU::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
CPU::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
const TheISA::VecElem& val, ThreadID tid)
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1356,7 +1353,7 @@ FullO3CPU::setArchVecElem(const RegIndex& reg_idx, const ElemIndex& ldx,
}
void
FullO3CPU::setArchVecPredReg(int reg_idx,
CPU::setArchVecPredReg(int reg_idx,
const TheISA::VecPredRegContainer& val, ThreadID tid)
{
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1365,7 +1362,7 @@ FullO3CPU::setArchVecPredReg(int reg_idx,
}
void
FullO3CPU::setArchCCReg(int reg_idx, RegVal val, ThreadID tid)
CPU::setArchCCReg(int reg_idx, RegVal val, ThreadID tid)
{
cpuStats.ccRegfileWrites++;
PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
@@ -1375,44 +1372,44 @@ FullO3CPU::setArchCCReg(int reg_idx, RegVal val, ThreadID tid)
}
TheISA::PCState
FullO3CPU::pcState(ThreadID tid)
CPU::pcState(ThreadID tid)
{
return commit.pcState(tid);
}
void
FullO3CPU::pcState(const TheISA::PCState &val, ThreadID tid)
CPU::pcState(const TheISA::PCState &val, ThreadID tid)
{
commit.pcState(val, tid);
}
Addr
FullO3CPU::instAddr(ThreadID tid)
CPU::instAddr(ThreadID tid)
{
return commit.instAddr(tid);
}
Addr
FullO3CPU::nextInstAddr(ThreadID tid)
CPU::nextInstAddr(ThreadID tid)
{
return commit.nextInstAddr(tid);
}
MicroPC
FullO3CPU::microPC(ThreadID tid)
CPU::microPC(ThreadID tid)
{
return commit.microPC(tid);
}
void
FullO3CPU::squashFromTC(ThreadID tid)
CPU::squashFromTC(ThreadID tid)
{
thread[tid]->noSquashFromTC = true;
commit.generateTCEvent(tid);
}
FullO3CPU::ListIt
FullO3CPU::addInst(const O3DynInstPtr &inst)
CPU::ListIt
CPU::addInst(const DynInstPtr &inst)
{
instList.push_back(inst);
@@ -1420,7 +1417,7 @@ FullO3CPU::addInst(const O3DynInstPtr &inst)
}
void
FullO3CPU::instDone(ThreadID tid, const O3DynInstPtr &inst)
CPU::instDone(ThreadID tid, const DynInstPtr &inst)
{
// Keep an instruction count.
if (!inst->isMicroop() || inst->isLastMicroop()) {
@@ -1439,7 +1436,7 @@ FullO3CPU::instDone(ThreadID tid, const O3DynInstPtr &inst)
}
void
FullO3CPU::removeFrontInst(const O3DynInstPtr &inst)
CPU::removeFrontInst(const DynInstPtr &inst)
{
DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s "
"[sn:%lli]\n",
@@ -1452,7 +1449,7 @@ FullO3CPU::removeFrontInst(const O3DynInstPtr &inst)
}
void
FullO3CPU::removeInstsNotInROB(ThreadID tid)
CPU::removeInstsNotInROB(ThreadID tid)
{
DPRINTF(O3CPU, "Thread %i: Deleting instructions from instruction"
" list.\n", tid);
@@ -1496,7 +1493,7 @@ FullO3CPU::removeInstsNotInROB(ThreadID tid)
}
void
FullO3CPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
CPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
{
assert(!instList.empty());
@@ -1524,7 +1521,7 @@ FullO3CPU::removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid)
}
void
FullO3CPU::squashInstIt(const ListIt &instIt, ThreadID tid)
CPU::squashInstIt(const ListIt &instIt, ThreadID tid)
{
if ((*instIt)->threadNumber == tid) {
DPRINTF(O3CPU, "Squashing instruction, "
@@ -1544,7 +1541,7 @@ FullO3CPU::squashInstIt(const ListIt &instIt, ThreadID tid)
}
void
FullO3CPU::cleanUpRemovedInsts()
CPU::cleanUpRemovedInsts()
{
while (!removeList.empty()) {
DPRINTF(O3CPU, "Removing instruction, "
@@ -1562,13 +1559,13 @@ FullO3CPU::cleanUpRemovedInsts()
}
/*
void
FullO3CPU::removeAllInsts()
CPU::removeAllInsts()
{
instList.clear();
}
*/
void
FullO3CPU::dumpInsts()
CPU::dumpInsts()
{
int num = 0;
@@ -1588,13 +1585,13 @@ FullO3CPU::dumpInsts()
}
/*
void
FullO3CPU::wakeDependents(const O3DynInstPtr &inst)
CPU::wakeDependents(const DynInstPtr &inst)
{
iew.wakeDependents(inst);
}
*/
void
FullO3CPU::wakeCPU()
CPU::wakeCPU()
{
if (activityRec.active() || tickEvent.scheduled()) {
DPRINTF(Activity, "CPU already running.\n");
@@ -1615,9 +1612,9 @@ FullO3CPU::wakeCPU()
}
void
FullO3CPU::wakeup(ThreadID tid)
CPU::wakeup(ThreadID tid)
{
if (thread[tid]->status() != ThreadContext::Suspended)
if (thread[tid]->status() != ::ThreadContext::Suspended)
return;
wakeCPU();
@@ -1627,7 +1624,7 @@ FullO3CPU::wakeup(ThreadID tid)
}
ThreadID
FullO3CPU::getFreeTid()
CPU::getFreeTid()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (!tids[tid]) {
@@ -1640,7 +1637,7 @@ FullO3CPU::getFreeTid()
}
void
FullO3CPU::updateThreadPriority()
CPU::updateThreadPriority()
{
if (activeThreads.size() > 1) {
//DEFAULT TO ROUND ROBIN SCHEME
@@ -1656,12 +1653,12 @@ FullO3CPU::updateThreadPriority()
}
void
FullO3CPU::addThreadToExitingList(ThreadID tid)
CPU::addThreadToExitingList(ThreadID tid)
{
DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid);
// the thread trying to exit can't be already halted
assert(tcBase(tid)->status() != ThreadContext::Halted);
assert(tcBase(tid)->status() != ::ThreadContext::Halted);
// make sure the thread has not been added to the list yet
assert(exitingThreads.count(tid) == 0);
@@ -1675,13 +1672,13 @@ FullO3CPU::addThreadToExitingList(ThreadID tid)
}
bool
FullO3CPU::isThreadExiting(ThreadID tid) const
CPU::isThreadExiting(ThreadID tid) const
{
return exitingThreads.count(tid) == 1;
}
void
FullO3CPU::scheduleThreadExitEvent(ThreadID tid)
CPU::scheduleThreadExitEvent(ThreadID tid)
{
assert(exitingThreads.count(tid) == 1);
@@ -1701,7 +1698,7 @@ FullO3CPU::scheduleThreadExitEvent(ThreadID tid)
}
void
FullO3CPU::exitThreads()
CPU::exitThreads()
{
// there must be at least one thread trying to exit
assert(exitingThreads.size() > 0);
@@ -1715,7 +1712,7 @@ FullO3CPU::exitThreads()
if (readyToExit) {
DPRINTF(O3CPU, "Exiting thread %d\n", thread_id);
haltContext(thread_id);
tcBase(thread_id)->setStatus(ThreadContext::Halted);
tcBase(thread_id)->setStatus(::ThreadContext::Halted);
it = exitingThreads.erase(it);
} else {
it++;
@@ -1724,8 +1721,8 @@ FullO3CPU::exitThreads()
}
void
FullO3CPU::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid,
HtmFailureFaultCause cause)
CPU::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid,
HtmFailureFaultCause cause)
{
const Addr addr = 0x0ul;
const int size = 8;
@@ -1757,3 +1754,5 @@ FullO3CPU::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid,
panic("HTM abort signal was not sent to the memory subsystem.");
}
}
} // namespace o3

View File

@@ -69,28 +69,32 @@
#include "cpu/base.hh"
#include "cpu/simple_thread.hh"
#include "cpu/timebuf.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
#include "sim/process.hh"
template <class>
class Checker;
class ThreadContext;
class O3ThreadContext;
class Checkpoint;
class Process;
namespace o3
{
class ThreadContext;
/**
* FullO3CPU class, has each of the stages (fetch through commit)
* O3CPU class, has each of the stages (fetch through commit)
* within it, as well as all of the time buffers between stages. The
* tick() function for the CPU is defined here.
*/
class FullO3CPU : public BaseCPU
class CPU : public BaseCPU
{
public:
typedef std::list<O3DynInstPtr>::iterator ListIt;
typedef std::list<DynInstPtr>::iterator ListIt;
friend class O3ThreadContext;
friend class ThreadContext;
public:
enum Status
@@ -161,10 +165,10 @@ class FullO3CPU : public BaseCPU
public:
/** Constructs a CPU with the given parameters. */
FullO3CPU(const DerivO3CPUParams &params);
CPU(const O3CPUParams &params);
ProbePointArg<PacketPtr> *ppInstAccessComplete;
ProbePointArg<std::pair<O3DynInstPtr, PacketPtr> > *ppDataAccessComplete;
ProbePointArg<std::pair<DynInstPtr, PacketPtr> > *ppDataAccessComplete;
/** Register probe points. */
void regProbePoints() override;
@@ -419,15 +423,15 @@ class FullO3CPU : public BaseCPU
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
*/
ListIt addInst(const O3DynInstPtr &inst);
ListIt addInst(const DynInstPtr &inst);
/** Function to tell the CPU that an instruction has completed. */
void instDone(ThreadID tid, const O3DynInstPtr &inst);
void instDone(ThreadID tid, const DynInstPtr &inst);
/** Remove an instruction from the front end of the list. There's
* no restriction on location of the instruction.
*/
void removeFrontInst(const O3DynInstPtr &inst);
void removeFrontInst(const DynInstPtr &inst);
/** Remove all instructions that are not currently in the ROB.
* There's also an option to not squash delay slot instructions.*/
@@ -452,7 +456,7 @@ class FullO3CPU : public BaseCPU
#endif
/** List of all the instructions in flight. */
std::list<O3DynInstPtr> instList;
std::list<DynInstPtr> instList;
/** List of all the instructions that will be removed at the end of this
* cycle.
@@ -473,19 +477,19 @@ class FullO3CPU : public BaseCPU
protected:
/** The fetch stage. */
DefaultFetch fetch;
Fetch fetch;
/** The decode stage. */
DefaultDecode decode;
Decode decode;
/** The dispatch stage. */
DefaultRename rename;
Rename rename;
/** The issue/execute/writeback stages. */
DefaultIEW iew;
IEW iew;
/** The commit stage. */
DefaultCommit commit;
Commit commit;
/** The rename mode of the vector registers */
Enums::VecRegRenameMode vecMode;
@@ -497,10 +501,10 @@ class FullO3CPU : public BaseCPU
UnifiedFreeList freeList;
/** The rename map. */
UnifiedRenameMap renameMap[O3MaxThreads];
UnifiedRenameMap renameMap[MaxThreads];
/** The commit rename map. */
UnifiedRenameMap commitRenameMap[O3MaxThreads];
UnifiedRenameMap commitRenameMap[MaxThreads];
/** The re-order buffer. */
ROB rob;
@@ -536,19 +540,19 @@ class FullO3CPU : public BaseCPU
};
/** The main time buffer to do backwards communication. */
TimeBuffer<O3Comm::TimeStruct> timeBuffer;
TimeBuffer<TimeStruct> timeBuffer;
/** The fetch stage's instruction queue. */
TimeBuffer<O3Comm::FetchStruct> fetchQueue;
TimeBuffer<FetchStruct> fetchQueue;
/** The decode stage's instruction queue. */
TimeBuffer<O3Comm::DecodeStruct> decodeQueue;
TimeBuffer<DecodeStruct> decodeQueue;
/** The rename stage's instruction queue. */
TimeBuffer<O3Comm::RenameStruct> renameQueue;
TimeBuffer<RenameStruct> renameQueue;
/** The IEW stage's instruction queue. */
TimeBuffer<O3Comm::IEWStruct> iewQueue;
TimeBuffer<IEWStruct> iewQueue;
private:
/** The activity recorder; used to tell if the CPU has any
@@ -579,26 +583,26 @@ class FullO3CPU : public BaseCPU
public:
/** Returns a pointer to a thread context. */
ThreadContext *
::ThreadContext *
tcBase(ThreadID tid)
{
return thread[tid]->getTC();
}
/** The global sequence number counter. */
InstSeqNum globalSeqNum;//[O3MaxThreads];
InstSeqNum globalSeqNum;//[MaxThreads];
/** Pointer to the checker, which can dynamically verify
* instruction results at run time. This can be set to NULL if it
* is not being used.
*/
Checker<O3DynInstPtr> *checker;
::Checker<DynInstPtr> *checker;
/** Pointer to the system. */
System *system;
/** Pointers to all of the threads in the CPU. */
std::vector<O3ThreadState *> thread;
std::vector<ThreadState *> thread;
/** Threads Scheduled to Enter CPU */
std::list<int> cpuWaitList;
@@ -616,7 +620,7 @@ class FullO3CPU : public BaseCPU
std::vector<ThreadID> tids;
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op = nullptr,
const std::vector<bool>& byte_enable =
@@ -653,9 +657,9 @@ class FullO3CPU : public BaseCPU
return iew.ldstQueue.getDataPort();
}
struct FullO3CPUStats : public Stats::Group
struct CPUStats : public Stats::Group
{
FullO3CPUStats(FullO3CPU *cpu);
CPUStats(CPU *cpu);
/** Stat for total number of times the CPU is descheduled. */
Stats::Scalar timesIdled;
@@ -704,4 +708,6 @@ class FullO3CPU : public BaseCPU
HtmFailureFaultCause cause);
};
} // namespace o3
#endif // __CPU_O3_CPU_HH__

View File

@@ -49,14 +49,17 @@
#include "debug/Activity.hh"
#include "debug/Decode.hh"
#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
#include "sim/full_system.hh"
// clang complains about std::set being overloaded with Packet::set if
// we open up the entire namespace std
using std::list;
DefaultDecode::DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams &params)
namespace o3
{
Decode::Decode(CPU *_cpu, const O3CPUParams &params)
: cpu(_cpu),
renameToDecodeDelay(params.renameToDecodeDelay),
iewToDecodeDelay(params.iewToDecodeDelay),
@@ -66,14 +69,14 @@ DefaultDecode::DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams &params)
numThreads(params.numThreads),
stats(_cpu)
{
if (decodeWidth > O3MaxWidth)
if (decodeWidth > MaxWidth)
fatal("decodeWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
decodeWidth, static_cast<int>(O3MaxWidth));
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
decodeWidth, static_cast<int>(MaxWidth));
// @todo: Make into a parameter
skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth;
for (int tid = 0; tid < O3MaxThreads; tid++) {
for (int tid = 0; tid < MaxThreads; tid++) {
stalls[tid] = {false};
decodeStatus[tid] = Idle;
bdelayDoneSeqNum[tid] = 0;
@@ -83,20 +86,20 @@ DefaultDecode::DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
void
DefaultDecode::startupStage()
Decode::startupStage()
{
resetStage();
}
void
DefaultDecode::clearStates(ThreadID tid)
Decode::clearStates(ThreadID tid)
{
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
}
void
DefaultDecode::resetStage()
Decode::resetStage()
{
_status = Inactive;
@@ -109,12 +112,12 @@ DefaultDecode::resetStage()
}
std::string
DefaultDecode::name() const
Decode::name() const
{
return cpu->name() + ".decode";
}
DefaultDecode::DecodeStats::DecodeStats(FullO3CPU *cpu)
Decode::DecodeStats::DecodeStats(CPU *cpu)
: Stats::Group(cpu, "decode"),
ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
"Number of cycles decode is idle"),
@@ -151,7 +154,7 @@ DefaultDecode::DecodeStats::DecodeStats(FullO3CPU *cpu)
}
void
DefaultDecode::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
Decode::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
@@ -165,7 +168,7 @@ DefaultDecode::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
}
void
DefaultDecode::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
Decode::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
decodeQueue = dq_ptr;
@@ -174,7 +177,7 @@ DefaultDecode::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
}
void
DefaultDecode::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
Decode::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
fetchQueue = fq_ptr;
@@ -183,13 +186,13 @@ DefaultDecode::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
}
void
DefaultDecode::setActiveThreads(std::list<ThreadID> *at_ptr)
Decode::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
void
DefaultDecode::drainSanityCheck() const
Decode::drainSanityCheck() const
{
for (ThreadID tid = 0; tid < numThreads; ++tid) {
assert(insts[tid].empty());
@@ -198,7 +201,7 @@ DefaultDecode::drainSanityCheck() const
}
bool
DefaultDecode::isDrained() const
Decode::isDrained() const
{
for (ThreadID tid = 0; tid < numThreads; ++tid) {
if (!insts[tid].empty() || !skidBuffer[tid].empty() ||
@@ -209,7 +212,7 @@ DefaultDecode::isDrained() const
}
bool
DefaultDecode::checkStall(ThreadID tid) const
Decode::checkStall(ThreadID tid) const
{
bool ret_val = false;
@@ -222,13 +225,13 @@ DefaultDecode::checkStall(ThreadID tid) const
}
bool
DefaultDecode::fetchInstsValid()
Decode::fetchInstsValid()
{
return fromFetch->size > 0;
}
bool
DefaultDecode::block(ThreadID tid)
Decode::block(ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] Blocking.\n", tid);
@@ -257,7 +260,7 @@ DefaultDecode::block(ThreadID tid)
}
bool
DefaultDecode::unblock(ThreadID tid)
Decode::unblock(ThreadID tid)
{
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
@@ -275,7 +278,7 @@ DefaultDecode::unblock(ThreadID tid)
}
void
DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid)
Decode::squash(const DynInstPtr &inst, ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
"prediction detected at decode.\n", tid, inst->seqNum);
@@ -326,7 +329,7 @@ DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid)
}
unsigned
DefaultDecode::squash(ThreadID tid)
Decode::squash(ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] Squashing.\n",tid);
@@ -373,9 +376,9 @@ DefaultDecode::squash(ThreadID tid)
}
void
DefaultDecode::skidInsert(ThreadID tid)
Decode::skidInsert(ThreadID tid)
{
O3DynInstPtr inst = NULL;
DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -397,7 +400,7 @@ DefaultDecode::skidInsert(ThreadID tid)
}
bool
DefaultDecode::skidsEmpty()
Decode::skidsEmpty()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
@@ -412,7 +415,7 @@ DefaultDecode::skidsEmpty()
}
void
DefaultDecode::updateStatus()
Decode::updateStatus()
{
bool any_unblocking = false;
@@ -435,7 +438,7 @@ DefaultDecode::updateStatus()
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(FullO3CPU::DecodeIdx);
cpu->activateStage(CPU::DecodeIdx);
}
} else {
// If it's not unblocking, then decode will not have any internal
@@ -444,13 +447,13 @@ DefaultDecode::updateStatus()
_status = Inactive;
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::DecodeIdx);
cpu->deactivateStage(CPU::DecodeIdx);
}
}
}
void
DefaultDecode::sortInsts()
Decode::sortInsts()
{
int insts_from_fetch = fromFetch->size;
for (int i = 0; i < insts_from_fetch; ++i) {
@@ -459,7 +462,7 @@ DefaultDecode::sortInsts()
}
void
DefaultDecode::readStallSignals(ThreadID tid)
Decode::readStallSignals(ThreadID tid)
{
if (fromRename->renameBlock[tid]) {
stalls[tid].rename = true;
@@ -472,7 +475,7 @@ DefaultDecode::readStallSignals(ThreadID tid)
}
bool
DefaultDecode::checkSignalsAndUpdate(ThreadID tid)
Decode::checkSignalsAndUpdate(ThreadID tid)
{
// Check if there's a squash signal, squash if there is.
// Check stall signals, block if necessary.
@@ -528,7 +531,7 @@ DefaultDecode::checkSignalsAndUpdate(ThreadID tid)
}
void
DefaultDecode::tick()
Decode::tick()
{
wroteToTimeBuffer = false;
@@ -563,7 +566,7 @@ DefaultDecode::tick()
}
void
DefaultDecode::decode(bool &status_change, ThreadID tid)
Decode::decode(bool &status_change, ThreadID tid)
{
// If status is Running or idle,
// call decodeInsts()
@@ -607,7 +610,7 @@ DefaultDecode::decode(bool &status_change, ThreadID tid)
}
void
DefaultDecode::decodeInsts(ThreadID tid)
Decode::decodeInsts(ThreadID tid)
{
// Instructions can come either from the skid buffer or the list of
// instructions coming from fetch, depending on decode's status.
@@ -628,7 +631,7 @@ DefaultDecode::decodeInsts(ThreadID tid)
++stats.runCycles;
}
std::queue<O3DynInstPtr>
std::queue<DynInstPtr>
&insts_to_decode = decodeStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
@@ -637,7 +640,7 @@ DefaultDecode::decodeInsts(ThreadID tid)
while (insts_available > 0 && toRenameIndex < decodeWidth) {
assert(!insts_to_decode.empty());
O3DynInstPtr inst = std::move(insts_to_decode.front());
DynInstPtr inst = std::move(insts_to_decode.front());
insts_to_decode.pop();
@@ -734,3 +737,5 @@ DefaultDecode::decodeInsts(ThreadID tid)
wroteToTimeBuffer = true;
}
}
} // namespace o3

View File

@@ -49,18 +49,21 @@
#include "cpu/o3/limits.hh"
#include "cpu/timebuf.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
class FullO3CPU;
namespace o3
{
class CPU;
/**
* DefaultDecode class handles both single threaded and SMT
* Decode class handles both single threaded and SMT
* decode. Its width is specified by the parameters; each cycles it
* tries to decode that many instructions. Because instructions are
* actually decoded when the StaticInst is created, this stage does
* not do much other than check any PC-relative branches.
*/
class DefaultDecode
class Decode
{
public:
/** Overall decode stage status. Used to determine if the CPU can
@@ -88,11 +91,11 @@ class DefaultDecode
DecodeStatus _status;
/** Per-thread status. */
ThreadStatus decodeStatus[O3MaxThreads];
ThreadStatus decodeStatus[MaxThreads];
public:
/** DefaultDecode constructor. */
DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams &params);
/** Decode constructor. */
Decode(CPU *_cpu, const O3CPUParams &params);
void startupStage();
@@ -105,13 +108,13 @@ class DefaultDecode
std::string name() const;
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr);
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
/** Sets pointer to time buffer coming from fetch. */
void setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr);
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
@@ -188,7 +191,7 @@ class DefaultDecode
/** Squashes if there is a PC-relative branch that was predicted
* incorrectly. Sends squash information back to fetch.
*/
void squash(const O3DynInstPtr &inst, ThreadID tid);
void squash(const DynInstPtr &inst, ThreadID tid);
public:
/** Squashes due to commit signalling a squash. Changes status to
@@ -199,41 +202,41 @@ class DefaultDecode
private:
// Interfaces to objects outside of decode.
/** CPU interface. */
FullO3CPU *cpu;
CPU *cpu;
/** Time buffer interface. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get rename's output from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
TimeBuffer<TimeStruct>::wire fromRename;
/** Wire to get iew's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write information heading to previous stages. */
// Might not be the best name as not only fetch will read it.
TimeBuffer<O3Comm::TimeStruct>::wire toFetch;
TimeBuffer<TimeStruct>::wire toFetch;
/** Decode instruction queue. */
TimeBuffer<O3Comm::DecodeStruct> *decodeQueue;
TimeBuffer<DecodeStruct> *decodeQueue;
/** Wire used to write any information heading to rename. */
TimeBuffer<O3Comm::DecodeStruct>::wire toRename;
TimeBuffer<DecodeStruct>::wire toRename;
/** Fetch instruction queue interface. */
TimeBuffer<O3Comm::FetchStruct> *fetchQueue;
TimeBuffer<FetchStruct> *fetchQueue;
/** Wire to get fetch's output from fetch queue. */
TimeBuffer<O3Comm::FetchStruct>::wire fromFetch;
TimeBuffer<FetchStruct>::wire fromFetch;
/** Queue of all instructions coming from fetch this cycle. */
std::queue<O3DynInstPtr> insts[O3MaxThreads];
std::queue<DynInstPtr> insts[MaxThreads];
/** Skid buffer between fetch and decode. */
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
std::queue<DynInstPtr> skidBuffer[MaxThreads];
/** Variable that tracks if decode has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
@@ -247,7 +250,7 @@ class DefaultDecode
};
/** Tracks which stages are telling decode to stall. */
Stalls stalls[O3MaxThreads];
Stalls stalls[MaxThreads];
/** Rename to decode delay. */
Cycles renameToDecodeDelay;
@@ -277,20 +280,20 @@ class DefaultDecode
unsigned skidBufferMax;
/** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/
Addr bdelayDoneSeqNum[O3MaxThreads];
Addr bdelayDoneSeqNum[MaxThreads];
/** Instruction used for squashing branch (used for MIPS)*/
O3DynInstPtr squashInst[O3MaxThreads];
DynInstPtr squashInst[MaxThreads];
/** Tells when their is a pending delay slot inst. to send
* to rename. If there is, then wait squash after the next
* instruction (used for MIPS).
*/
bool squashAfterDelaySlot[O3MaxThreads];
bool squashAfterDelaySlot[MaxThreads];
struct DecodeStats : public Stats::Group
{
DecodeStats(FullO3CPU *cpu);
DecodeStats(CPU *cpu);
/** Stat for total number of idle cycles. */
Stats::Scalar idleCycles;
@@ -317,4 +320,6 @@ class DefaultDecode
} stats;
};
} // namespace o3
#endif // __CPU_O3_DECODE_HH__

View File

@@ -43,6 +43,9 @@
#include "cpu/o3/comm.hh"
namespace o3
{
/** Node in a linked list. */
template <class DynInstPtr>
class DependencyEntry
@@ -292,4 +295,6 @@ DependencyGraph<DynInstPtr>::dump()
cprintf("memAllocCounter: %i\n", memAllocCounter);
}
} // namespace o3
#endif // __CPU_O3_DEP_GRAPH_HH__

View File

@@ -1,42 +0,0 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_DERIV_HH__
#define __CPU_O3_DERIV_HH__
#include "cpu/o3/cpu.hh"
#include "cpu/o3/dyn_inst.hh"
#include "params/DerivO3CPU.hh"
class DerivO3CPU : public FullO3CPU
{
public:
DerivO3CPU(const DerivO3CPUParams &p) : FullO3CPU(p) {}
};
#endif // __CPU_O3_DERIV_HH__

View File

@@ -46,10 +46,12 @@
#include "debug/IQ.hh"
#include "debug/O3PipeView.hh"
BaseO3DynInst::BaseO3DynInst(const StaticInstPtr &static_inst,
const StaticInstPtr &_macroop,
TheISA::PCState _pc, TheISA::PCState pred_pc,
InstSeqNum seq_num, FullO3CPU *_cpu)
namespace o3
{
DynInst::DynInst(const StaticInstPtr &static_inst,
const StaticInstPtr &_macroop, TheISA::PCState _pc,
TheISA::PCState pred_pc, InstSeqNum seq_num, CPU *_cpu)
: seqNum(seq_num), staticInst(static_inst), cpu(_cpu), pc(_pc),
regs(staticInst->numSrcRegs(), staticInst->numDestRegs()),
predPC(pred_pc), macroop(_macroop)
@@ -85,12 +87,12 @@ BaseO3DynInst::BaseO3DynInst(const StaticInstPtr &static_inst,
}
BaseO3DynInst::BaseO3DynInst(const StaticInstPtr &_staticInst,
const StaticInstPtr &_macroop)
: BaseO3DynInst(_staticInst, _macroop, {}, {}, 0, nullptr)
DynInst::DynInst(const StaticInstPtr &_staticInst,
const StaticInstPtr &_macroop)
: DynInst(_staticInst, _macroop, {}, {}, 0, nullptr)
{}
BaseO3DynInst::~BaseO3DynInst()
DynInst::~DynInst()
{
#if TRACING_ON
if (Debug::O3PipeView) {
@@ -145,7 +147,7 @@ BaseO3DynInst::~BaseO3DynInst()
#ifdef DEBUG
void
BaseO3DynInst::dumpSNList()
DynInst::dumpSNList()
{
std::set<InstSeqNum>::iterator sn_it = cpu->snList.begin();
@@ -159,7 +161,7 @@ BaseO3DynInst::dumpSNList()
#endif
void
BaseO3DynInst::dump()
DynInst::dump()
{
cprintf("T%d : %#08d `", threadNumber, pc.instAddr());
std::cout << staticInst->disassemble(pc.instAddr());
@@ -167,7 +169,7 @@ BaseO3DynInst::dump()
}
void
BaseO3DynInst::dump(std::string &outstring)
DynInst::dump(std::string &outstring)
{
std::ostringstream s;
s << "T" << threadNumber << " : 0x" << pc.instAddr() << " "
@@ -177,7 +179,7 @@ BaseO3DynInst::dump(std::string &outstring)
}
void
BaseO3DynInst::markSrcRegReady()
DynInst::markSrcRegReady()
{
DPRINTF(IQ, "[sn:%lli] has %d ready out of %d sources. RTI %d)\n",
seqNum, readyRegs+1, numSrcRegs(), readyToIssue());
@@ -187,7 +189,7 @@ BaseO3DynInst::markSrcRegReady()
}
void
BaseO3DynInst::markSrcRegReady(RegIndex src_idx)
DynInst::markSrcRegReady(RegIndex src_idx)
{
regs.readySrcIdx(src_idx, true);
markSrcRegReady();
@@ -195,7 +197,7 @@ BaseO3DynInst::markSrcRegReady(RegIndex src_idx)
void
BaseO3DynInst::setSquashed()
DynInst::setSquashed()
{
status.set(Squashed);
@@ -220,7 +222,7 @@ BaseO3DynInst::setSquashed()
}
Fault
BaseO3DynInst::execute()
DynInst::execute()
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
@@ -237,7 +239,7 @@ BaseO3DynInst::execute()
}
Fault
BaseO3DynInst::initiateAcc()
DynInst::initiateAcc()
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
@@ -254,7 +256,7 @@ BaseO3DynInst::initiateAcc()
}
Fault
BaseO3DynInst::completeAcc(PacketPtr pkt)
DynInst::completeAcc(PacketPtr pkt)
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
@@ -277,44 +279,44 @@ BaseO3DynInst::completeAcc(PacketPtr pkt)
}
void
BaseO3DynInst::trap(const Fault &fault)
DynInst::trap(const Fault &fault)
{
this->cpu->trap(fault, this->threadNumber, this->staticInst);
}
Fault
BaseO3DynInst::initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
DynInst::initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
const std::vector<bool> &byte_enable)
{
assert(byte_enable.size() == size);
return cpu->pushRequest(
dynamic_cast<O3DynInstPtr::PtrType>(this),
dynamic_cast<DynInstPtr::PtrType>(this),
/* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
byte_enable);
}
Fault
BaseO3DynInst::initiateHtmCmd(Request::Flags flags)
DynInst::initiateHtmCmd(Request::Flags flags)
{
return cpu->pushRequest(
dynamic_cast<O3DynInstPtr::PtrType>(this),
dynamic_cast<DynInstPtr::PtrType>(this),
/* ld */ true, nullptr, 8, 0x0ul, flags, nullptr, nullptr);
}
Fault
BaseO3DynInst::writeMem(uint8_t *data, unsigned size, Addr addr,
DynInst::writeMem(uint8_t *data, unsigned size, Addr addr,
Request::Flags flags, uint64_t *res,
const std::vector<bool> &byte_enable)
{
assert(byte_enable.size() == size);
return cpu->pushRequest(
dynamic_cast<O3DynInstPtr::PtrType>(this),
dynamic_cast<DynInstPtr::PtrType>(this),
/* st */ false, data, size, addr, flags, res, nullptr,
byte_enable);
}
Fault
BaseO3DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
AtomicOpFunctorPtr amo_op)
{
// atomic memory instructions do not have data to be written to memory yet
@@ -323,7 +325,9 @@ BaseO3DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
// Atomic memory requests need to carry their `amo_op` fields to cache/
// memory
return cpu->pushRequest(
dynamic_cast<O3DynInstPtr::PtrType>(this),
dynamic_cast<DynInstPtr::PtrType>(this),
/* atomic */ false, nullptr, size, addr, flags, nullptr,
std::move(amo_op), std::vector<bool>(size, true));
}
} // namespace o3

View File

@@ -68,22 +68,24 @@
class Packet;
class BaseO3DynInst : public ExecContext, public RefCounted
namespace o3
{
class DynInst : public ExecContext, public RefCounted
{
public:
// The list of instructions iterator type.
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
typedef typename std::list<DynInstPtr>::iterator ListIt;
/** BaseDynInst constructor given a binary instruction. */
BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
&macroop, TheISA::PCState pc, TheISA::PCState predPC,
InstSeqNum seq_num, FullO3CPU *cpu);
InstSeqNum seq_num, CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
BaseO3DynInst(const StaticInstPtr &_staticInst,
const StaticInstPtr &_macroop);
DynInst(const StaticInstPtr &_staticInst, const StaticInstPtr &_macroop);
~BaseO3DynInst();
~DynInst();
/** Executes the instruction.*/
Fault execute();
@@ -101,12 +103,12 @@ class BaseO3DynInst : public ExecContext, public RefCounted
const StaticInstPtr staticInst;
/** Pointer to the Impl's CPU object. */
FullO3CPU *cpu = nullptr;
CPU *cpu = nullptr;
BaseCPU *getCpuPtr() { return cpu; }
/** Pointer to the thread state. */
O3ThreadState *thread = nullptr;
ThreadState *thread = nullptr;
/** The kind of fault this instruction has generated. */
Fault fault = NoFault;
@@ -1014,10 +1016,10 @@ class BaseO3DynInst : public ExecContext, public RefCounted
void setTid(ThreadID tid) { threadNumber = tid; }
/** Sets the pointer to the thread state. */
void setThreadState(O3ThreadState *state) { thread = state; }
void setThreadState(ThreadState *state) { thread = state; }
/** Returns the thread context. */
ThreadContext *tcBase() const override { return thread->getTC(); }
::ThreadContext *tcBase() const override { return thread->getTC(); }
public:
/** Is this instruction's memory access strictly ordered? */
@@ -1063,7 +1065,7 @@ class BaseO3DynInst : public ExecContext, public RefCounted
return cpu->mwait(threadNumber, pkt);
}
void
mwaitAtomic(ThreadContext *tc) override
mwaitAtomic(::ThreadContext *tc) override
{
return cpu->mwaitAtomic(threadNumber, tc, cpu->mmu);
}
@@ -1322,4 +1324,6 @@ class BaseO3DynInst : public ExecContext, public RefCounted
}
};
} // namespace o3
#endif // __CPU_O3_DYN_INST_HH__

View File

@@ -44,9 +44,14 @@
#include "base/refcnt.hh"
class BaseO3DynInst;
namespace o3
{
using O3DynInstPtr = RefCountingPtr<BaseO3DynInst>;
using O3DynInstConstPtr = RefCountingPtr<const BaseO3DynInst>;
class DynInst;
using DynInstPtr = RefCountingPtr<DynInst>;
using DynInstConstPtr = RefCountingPtr<const DynInst>;
} // namespace o3
#endif // __CPU_O3_DYN_INST_PTR_HH__

View File

@@ -63,19 +63,22 @@
#include "debug/O3CPU.hh"
#include "debug/O3PipeView.hh"
#include "mem/packet.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
#include "sim/byteswap.hh"
#include "sim/core.hh"
#include "sim/eventq.hh"
#include "sim/full_system.hh"
#include "sim/system.hh"
DefaultFetch::IcachePort::IcachePort(DefaultFetch *_fetch, FullO3CPU *_cpu) :
namespace o3
{
Fetch::IcachePort::IcachePort(Fetch *_fetch, CPU *_cpu) :
RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch)
{}
DefaultFetch::DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams &params)
Fetch::Fetch(CPU *_cpu, const O3CPUParams &params)
: fetchPolicy(params.smtFetchPolicy),
cpu(_cpu),
branchPred(nullptr),
@@ -96,14 +99,14 @@ DefaultFetch::DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams &params)
icachePort(this, _cpu),
finishTranslationEvent(this), fetchStats(_cpu, this)
{
if (numThreads > O3MaxThreads)
if (numThreads > MaxThreads)
fatal("numThreads (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxThreads in src/cpu/o3/limits.hh\n",
numThreads, static_cast<int>(O3MaxThreads));
if (fetchWidth > O3MaxWidth)
"\tincrease MaxThreads in src/cpu/o3/limits.hh\n",
numThreads, static_cast<int>(MaxThreads));
if (fetchWidth > MaxWidth)
fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
fetchWidth, static_cast<int>(O3MaxWidth));
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
fetchWidth, static_cast<int>(MaxWidth));
if (fetchBufferSize > cacheBlkSize)
fatal("fetch buffer size (%u bytes) is greater than the cache "
"block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
@@ -114,7 +117,7 @@ DefaultFetch::DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams &params)
// Get the size of an instruction.
instSize = sizeof(TheISA::MachInst);
for (int i = 0; i < O3MaxThreads; i++) {
for (int i = 0; i < MaxThreads; i++) {
fetchStatus[i] = Idle;
decoder[i] = nullptr;
pc[i] = 0;
@@ -141,19 +144,18 @@ DefaultFetch::DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
}
std::string DefaultFetch::name() const { return cpu->name() + ".fetch"; }
std::string Fetch::name() const { return cpu->name() + ".fetch"; }
void
DefaultFetch::regProbePoints()
Fetch::regProbePoints()
{
ppFetch = new ProbePointArg<O3DynInstPtr>(cpu->getProbeManager(), "Fetch");
ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
"FetchRequest");
}
DefaultFetch::FetchStatGroup::FetchStatGroup(
FullO3CPU *cpu, DefaultFetch *fetch)
Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
: Stats::Group(cpu, "fetch"),
ADD_STAT(icacheStallCycles, Stats::Units::Cycle::get(),
"Number of cycles fetch is stalled on an Icache miss"),
@@ -255,7 +257,7 @@ DefaultFetch::FetchStatGroup::FetchStatGroup(
.flags(Stats::total);
}
void
DefaultFetch::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *time_buffer)
Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
timeBuffer = time_buffer;
@@ -267,20 +269,20 @@ DefaultFetch::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *time_buffer)
}
void
DefaultFetch::setActiveThreads(std::list<ThreadID> *at_ptr)
Fetch::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
void
DefaultFetch::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *ftb_ptr)
Fetch::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)
{
// Create wire to write information to proper place in fetch time buf.
toDecode = ftb_ptr->getWire(0);
}
void
DefaultFetch::startupStage()
Fetch::startupStage()
{
assert(priorityList.empty());
resetStage();
@@ -291,7 +293,7 @@ DefaultFetch::startupStage()
}
void
DefaultFetch::clearStates(ThreadID tid)
Fetch::clearStates(ThreadID tid)
{
fetchStatus[tid] = Running;
pc[tid] = cpu->pcState(tid);
@@ -310,7 +312,7 @@ DefaultFetch::clearStates(ThreadID tid)
}
void
DefaultFetch::resetStage()
Fetch::resetStage()
{
numInst = 0;
interruptPending = false;
@@ -344,7 +346,7 @@ DefaultFetch::resetStage()
}
void
DefaultFetch::processCacheCompletion(PacketPtr pkt)
Fetch::processCacheCompletion(PacketPtr pkt)
{
ThreadID tid = cpu->contextToThread(pkt->req->contextId());
@@ -387,7 +389,7 @@ DefaultFetch::processCacheCompletion(PacketPtr pkt)
}
void
DefaultFetch::drainResume()
Fetch::drainResume()
{
for (ThreadID i = 0; i < numThreads; ++i) {
stalls[i].decode = false;
@@ -396,7 +398,7 @@ DefaultFetch::drainResume()
}
void
DefaultFetch::drainSanityCheck() const
Fetch::drainSanityCheck() const
{
assert(isDrained());
assert(retryPkt == NULL);
@@ -413,7 +415,7 @@ DefaultFetch::drainSanityCheck() const
}
bool
DefaultFetch::isDrained() const
Fetch::isDrained() const
{
/* Make sure that threads are either idle of that the commit stage
* has signaled that draining has completed by setting the drain
@@ -443,7 +445,7 @@ DefaultFetch::isDrained() const
}
void
DefaultFetch::takeOverFrom()
Fetch::takeOverFrom()
{
assert(cpu->getInstPort().isConnected());
resetStage();
@@ -451,7 +453,7 @@ DefaultFetch::takeOverFrom()
}
void
DefaultFetch::drainStall(ThreadID tid)
Fetch::drainStall(ThreadID tid)
{
assert(cpu->isDraining());
assert(!stalls[tid].drain);
@@ -460,7 +462,7 @@ DefaultFetch::drainStall(ThreadID tid)
}
void
DefaultFetch::wakeFromQuiesce()
Fetch::wakeFromQuiesce()
{
DPRINTF(Fetch, "Waking up from quiesce\n");
// Hopefully this is safe
@@ -469,31 +471,31 @@ DefaultFetch::wakeFromQuiesce()
}
void
DefaultFetch::switchToActive()
Fetch::switchToActive()
{
if (_status == Inactive) {
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(FullO3CPU::FetchIdx);
cpu->activateStage(CPU::FetchIdx);
_status = Active;
}
}
void
DefaultFetch::switchToInactive()
Fetch::switchToInactive()
{
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::FetchIdx);
cpu->deactivateStage(CPU::FetchIdx);
_status = Inactive;
}
}
void
DefaultFetch::deactivateThread(ThreadID tid)
Fetch::deactivateThread(ThreadID tid)
{
// Update priority list
auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
@@ -503,8 +505,7 @@ DefaultFetch::deactivateThread(ThreadID tid)
}
bool
DefaultFetch::lookupAndUpdateNextPC(const O3DynInstPtr &inst,
TheISA::PCState &nextPC)
Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &nextPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
@@ -548,7 +549,7 @@ DefaultFetch::lookupAndUpdateNextPC(const O3DynInstPtr &inst,
}
bool
DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
Fetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
{
Fault fault = NoFault;
@@ -597,7 +598,7 @@ DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
}
void
DefaultFetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
Fetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
{
ThreadID tid = cpu->contextToThread(mem_req->contextId());
Addr fetchBufferBlockPC = mem_req->getVaddr();
@@ -683,7 +684,7 @@ DefaultFetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
// We will use a nop in ordier to carry the fault.
O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
fetchPC, fetchPC, false);
instruction->setNotAnInst();
@@ -704,8 +705,8 @@ DefaultFetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
}
void
DefaultFetch::doSquash(const TheISA::PCState &newPC,
const O3DynInstPtr squashInst, ThreadID tid)
Fetch::doSquash(const TheISA::PCState &newPC, const DynInstPtr squashInst,
ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
tid, newPC);
@@ -755,8 +756,8 @@ DefaultFetch::doSquash(const TheISA::PCState &newPC,
}
void
DefaultFetch::squashFromDecode(const TheISA::PCState &newPC,
const O3DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
Fetch::squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
@@ -768,7 +769,7 @@ DefaultFetch::squashFromDecode(const TheISA::PCState &newPC,
}
bool
DefaultFetch::checkStall(ThreadID tid) const
Fetch::checkStall(ThreadID tid) const
{
bool ret_val = false;
@@ -781,8 +782,8 @@ DefaultFetch::checkStall(ThreadID tid) const
return ret_val;
}
DefaultFetch::FetchStatus
DefaultFetch::updateFetchStatus()
Fetch::FetchStatus
Fetch::updateFetchStatus()
{
//Check Running
std::list<ThreadID>::iterator threads = activeThreads->begin();
@@ -803,7 +804,7 @@ DefaultFetch::updateFetchStatus()
"completion\n",tid);
}
cpu->activateStage(FullO3CPU::FetchIdx);
cpu->activateStage(CPU::FetchIdx);
}
return Active;
@@ -814,15 +815,15 @@ DefaultFetch::updateFetchStatus()
if (_status == Active) {
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::FetchIdx);
cpu->deactivateStage(CPU::FetchIdx);
}
return Inactive;
}
void
DefaultFetch::squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
O3DynInstPtr squashInst, ThreadID tid)
Fetch::squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
DynInstPtr squashInst, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
@@ -833,7 +834,7 @@ DefaultFetch::squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
}
void
DefaultFetch::tick()
Fetch::tick()
{
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
@@ -935,7 +936,7 @@ DefaultFetch::tick()
}
bool
DefaultFetch::checkSignalsAndUpdate(ThreadID tid)
Fetch::checkSignalsAndUpdate(ThreadID tid)
{
// Update the per thread stall statuses.
if (fromDecode->decodeBlock[tid]) {
@@ -1038,8 +1039,8 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid)
return false;
}
O3DynInstPtr
DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
DynInstPtr
Fetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace)
{
@@ -1047,8 +1048,8 @@ DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
InstSeqNum seq = cpu->getAndIncrementInstSeq();
// Create a new DynInst from the instruction fetched.
O3DynInstPtr instruction =
new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
DynInstPtr instruction =
new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
instruction->setTid(tid);
instruction->setThreadState(cpu->thread[tid]);
@@ -1090,7 +1091,7 @@ DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
}
void
DefaultFetch::fetch(bool &status_change)
Fetch::fetch(bool &status_change)
{
//////////////////////////////////////////
// Start actual fetch
@@ -1265,9 +1266,8 @@ DefaultFetch::fetch(bool &status_change)
newMacro |= staticInst->isLastMicroop();
}
O3DynInstPtr instruction =
buildInst(tid, staticInst, curMacroop,
thisPC, nextPC, true);
DynInstPtr instruction =
buildInst(tid, staticInst, curMacroop, thisPC, nextPC, true);
ppFetch->notify(instruction);
numInst++;
@@ -1352,7 +1352,7 @@ DefaultFetch::fetch(bool &status_change)
}
void
DefaultFetch::recvReqRetry()
Fetch::recvReqRetry()
{
if (retryPkt != NULL) {
assert(cacheBlocked);
@@ -1382,7 +1382,7 @@ DefaultFetch::recvReqRetry()
// //
///////////////////////////////////////
ThreadID
DefaultFetch::getFetchingThread()
Fetch::getFetchingThread()
{
if (numThreads > 1) {
switch (fetchPolicy) {
@@ -1417,7 +1417,7 @@ DefaultFetch::getFetchingThread()
ThreadID
DefaultFetch::roundRobin()
Fetch::roundRobin()
{
std::list<ThreadID>::iterator pri_iter = priorityList.begin();
std::list<ThreadID>::iterator end = priorityList.end();
@@ -1446,7 +1446,7 @@ DefaultFetch::roundRobin()
}
ThreadID
DefaultFetch::iqCount()
Fetch::iqCount()
{
//sorted from lowest->highest
std::priority_queue<unsigned, std::vector<unsigned>,
@@ -1482,7 +1482,7 @@ DefaultFetch::iqCount()
}
ThreadID
DefaultFetch::lsqCount()
Fetch::lsqCount()
{
//sorted from lowest->highest
std::priority_queue<unsigned, std::vector<unsigned>,
@@ -1517,14 +1517,14 @@ DefaultFetch::lsqCount()
}
ThreadID
DefaultFetch::branchCount()
Fetch::branchCount()
{
panic("Branch Count Fetch policy unimplemented\n");
return InvalidThreadID;
}
void
DefaultFetch::pipelineIcacheAccesses(ThreadID tid)
Fetch::pipelineIcacheAccesses(ThreadID tid)
{
if (!issuePipelinedIfetch[tid]) {
return;
@@ -1553,7 +1553,7 @@ DefaultFetch::pipelineIcacheAccesses(ThreadID tid)
}
void
DefaultFetch::profileStall(ThreadID tid)
Fetch::profileStall(ThreadID tid)
{
DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
@@ -1602,7 +1602,7 @@ DefaultFetch::profileStall(ThreadID tid)
}
bool
DefaultFetch::IcachePort::recvTimingResp(PacketPtr pkt)
Fetch::IcachePort::recvTimingResp(PacketPtr pkt)
{
DPRINTF(O3CPU, "Fetch unit received timing\n");
// We shouldn't ever get a cacheable block in Modified state
@@ -1614,7 +1614,9 @@ DefaultFetch::IcachePort::recvTimingResp(PacketPtr pkt)
}
void
DefaultFetch::IcachePort::recvReqRetry()
Fetch::IcachePort::recvReqRetry()
{
fetch->recvReqRetry();
}
} // namespace o3

View File

@@ -57,18 +57,22 @@
#include "sim/eventq.hh"
#include "sim/probe/probe.hh"
struct DerivO3CPUParams;
class FullO3CPU;
struct O3CPUParams;
namespace o3
{
class CPU;
/**
* DefaultFetch class handles both single threaded and SMT fetch. Its
* Fetch class handles both single threaded and SMT fetch. Its
* width is specified by the parameters; each cycle it tries to fetch
* that many instructions. It supports using a branch predictor to
* predict direction and targets.
* It supports the idling functionality of the CPU by indicating to
* the CPU when it is active and inactive.
*/
class DefaultFetch
class Fetch
{
public:
/**
@@ -78,11 +82,11 @@ class DefaultFetch
{
protected:
/** Pointer to fetch. */
DefaultFetch *fetch;
Fetch *fetch;
public:
/** Default constructor. */
IcachePort(DefaultFetch *_fetch, FullO3CPU *_cpu);
IcachePort(Fetch *_fetch, CPU *_cpu);
protected:
@@ -97,15 +101,15 @@ class DefaultFetch
class FetchTranslation : public BaseTLB::Translation
{
protected:
DefaultFetch *fetch;
Fetch *fetch;
public:
FetchTranslation(DefaultFetch *_fetch) : fetch(_fetch) {}
FetchTranslation(Fetch *_fetch) : fetch(_fetch) {}
void markDelayed() {}
void
finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc,
finish(const Fault &fault, const RequestPtr &req, ::ThreadContext *tc,
BaseTLB::Mode mode)
{
assert(mode == BaseTLB::Execute);
@@ -121,12 +125,12 @@ class DefaultFetch
class FinishTranslationEvent : public Event
{
private:
DefaultFetch *fetch;
Fetch *fetch;
Fault fault;
RequestPtr req;
public:
FinishTranslationEvent(DefaultFetch *_fetch)
FinishTranslationEvent(Fetch *_fetch)
: fetch(_fetch), req(nullptr)
{}
@@ -144,7 +148,7 @@ class DefaultFetch
const char *
description() const
{
return "FullO3CPU FetchFinishTranslation";
return "CPU FetchFinishTranslation";
}
};
@@ -180,7 +184,7 @@ class DefaultFetch
FetchStatus _status;
/** Per-thread status. */
ThreadStatus fetchStatus[O3MaxThreads];
ThreadStatus fetchStatus[MaxThreads];
/** Fetch policy. */
SMTFetchPolicy fetchPolicy;
@@ -189,13 +193,13 @@ class DefaultFetch
std::list<ThreadID> priorityList;
/** Probe points. */
ProbePointArg<O3DynInstPtr> *ppFetch;
ProbePointArg<DynInstPtr> *ppFetch;
/** To probe when a fetch request is successfully sent. */
ProbePointArg<RequestPtr> *ppFetchRequestSent;
public:
/** DefaultFetch constructor. */
DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams &params);
/** Fetch constructor. */
Fetch(CPU *_cpu, const O3CPUParams &params);
/** Returns the name of fetch. */
std::string name() const;
@@ -205,13 +209,13 @@ class DefaultFetch
void regProbePoints();
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *time_buffer);
void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr);
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Initialize stage. */
void startupStage();
@@ -276,7 +280,7 @@ class DefaultFetch
* @param next_NPC Used for ISAs which use delay slots.
* @return Whether or not a branch was predicted as taken.
*/
bool lookupAndUpdateNextPC(const O3DynInstPtr &inst, TheISA::PCState &pc);
bool lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &pc);
/**
* Fetches the cache line that contains the fetch PC. Returns any
@@ -299,14 +303,14 @@ class DefaultFetch
/** Squashes a specific thread and resets the PC. */
void doSquash(const TheISA::PCState &newPC,
const O3DynInstPtr squashInst, ThreadID tid);
const DynInstPtr squashInst, ThreadID tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode
* that should be sqaushed.
*/
void squashFromDecode(const TheISA::PCState &newPC,
const O3DynInstPtr squashInst,
const DynInstPtr squashInst,
const InstSeqNum seq_num, ThreadID tid);
/** Checks if a thread is stalled. */
@@ -322,7 +326,7 @@ class DefaultFetch
* squash should be the commit stage.
*/
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
O3DynInstPtr squashInst, ThreadID tid);
DynInstPtr squashInst, ThreadID tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
* as many instructions as possible.
@@ -348,14 +352,14 @@ class DefaultFetch
}
/** The decoder. */
TheISA::Decoder *decoder[O3MaxThreads];
TheISA::Decoder *decoder[MaxThreads];
RequestPort &getInstPort() { return icachePort; }
private:
O3DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace);
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace);
/** Returns the appropriate thread to fetch, given the fetch policy. */
ThreadID getFetchingThread();
@@ -381,41 +385,41 @@ class DefaultFetch
private:
/** Pointer to the O3CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Time buffer interface. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get decode's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromDecode;
TimeBuffer<TimeStruct>::wire fromDecode;
/** Wire to get rename's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
TimeBuffer<TimeStruct>::wire fromRename;
/** Wire to get iew's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's information from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
TimeBuffer<TimeStruct>::wire fromCommit;
//Might be annoying how this name is different than the queue.
/** Wire used to write any information heading to decode. */
TimeBuffer<O3Comm::FetchStruct>::wire toDecode;
TimeBuffer<FetchStruct>::wire toDecode;
/** BPredUnit. */
BPredUnit *branchPred;
TheISA::PCState pc[O3MaxThreads];
TheISA::PCState pc[MaxThreads];
Addr fetchOffset[O3MaxThreads];
Addr fetchOffset[MaxThreads];
StaticInstPtr macroop[O3MaxThreads];
StaticInstPtr macroop[MaxThreads];
/** Can the fetch stage redirect from an interrupt on this instruction? */
bool delayedCommit[O3MaxThreads];
bool delayedCommit[MaxThreads];
/** Memory request used to access cache. */
RequestPtr memReq[O3MaxThreads];
RequestPtr memReq[MaxThreads];
/** Variable that tracks if fetch has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
@@ -433,7 +437,7 @@ class DefaultFetch
};
/** Tracks which stages are telling fetch to stall. */
Stalls stalls[O3MaxThreads];
Stalls stalls[MaxThreads];
/** Decode to fetch delay. */
Cycles decodeToFetchDelay;
@@ -474,25 +478,25 @@ class DefaultFetch
Addr fetchBufferMask;
/** The fetch data that is being fetched and buffered. */
uint8_t *fetchBuffer[O3MaxThreads];
uint8_t *fetchBuffer[MaxThreads];
/** The PC of the first instruction loaded into the fetch buffer. */
Addr fetchBufferPC[O3MaxThreads];
Addr fetchBufferPC[MaxThreads];
/** The size of the fetch queue in micro-ops */
unsigned fetchQueueSize;
/** Queue of fetched instructions. Per-thread to prevent HoL blocking. */
std::deque<O3DynInstPtr> fetchQueue[O3MaxThreads];
std::deque<DynInstPtr> fetchQueue[MaxThreads];
/** Whether or not the fetch buffer data is valid. */
bool fetchBufferValid[O3MaxThreads];
bool fetchBufferValid[MaxThreads];
/** Size of instructions. */
int instSize;
/** Icache stall statistics. */
Counter lastIcacheStall[O3MaxThreads];
Counter lastIcacheStall[MaxThreads];
/** List of Active Threads */
std::list<ThreadID> *activeThreads;
@@ -515,7 +519,7 @@ class DefaultFetch
IcachePort icachePort;
/** Set to true if a pipelined I-cache request should be issued. */
bool issuePipelinedIfetch[O3MaxThreads];
bool issuePipelinedIfetch[MaxThreads];
/** Event used to delay fault generation of translation faults */
FinishTranslationEvent finishTranslationEvent;
@@ -523,7 +527,7 @@ class DefaultFetch
protected:
struct FetchStatGroup : public Stats::Group
{
FetchStatGroup(FullO3CPU *cpu, DefaultFetch *fetch);
FetchStatGroup(CPU *cpu, Fetch *fetch);
// @todo: Consider making these
// vectors and tracking on a per thread basis.
/** Stat for total number of cycles stalled due to an icache miss. */
@@ -581,4 +585,6 @@ class DefaultFetch
} fetchStats;
};
} // namespace o3
#endif //__CPU_O3_FETCH_HH__

View File

@@ -32,6 +32,9 @@
#include "base/trace.hh"
#include "debug/FreeList.hh"
namespace o3
{
UnifiedFreeList::UnifiedFreeList(const std::string &_my_name,
PhysRegFile *_regFile)
: _name(_my_name), regFile(_regFile)
@@ -42,3 +45,5 @@ UnifiedFreeList::UnifiedFreeList(const std::string &_my_name,
// about its internal organization
regFile->initFreeList(this);
}
} // namespace o3

View File

@@ -51,6 +51,11 @@
#include "cpu/o3/regfile.hh"
#include "debug/FreeList.hh"
namespace o3
{
class UnifiedRenameMap;
/**
* Free list for a single class of registers (e.g., integer
* or floating point). Because the register class is implicitly
@@ -335,5 +340,6 @@ UnifiedFreeList::addReg(PhysRegIdPtr freed_reg)
// assert(freeFloatRegs.size() <= numPhysicalFloatRegs);
}
} // namespace o3
#endif // __CPU_O3_FREE_LIST_HH__

View File

@@ -44,6 +44,9 @@
#include "cpu/func_unit.hh"
namespace o3
{
////////////////////////////////////////////////////////////////////////////
//
// A pool of function units
@@ -243,3 +246,5 @@ FUPool::isDrained() const
return is_drained;
}
} // namespace o3

View File

@@ -54,6 +54,9 @@
class FUDesc;
class FuncUnit;
namespace o3
{
/**
* Pool of FU's, specific to the new CPU model. The old FU pool had lists of
* free units and busy units, and whenever a FU was needed it would iterate
@@ -176,4 +179,6 @@ class FUPool : public SimObject
void takeOverFrom() {};
};
} // namespace o3
#endif // __CPU_O3_FU_POOL_HH__

View File

@@ -57,9 +57,12 @@
#include "debug/Drain.hh"
#include "debug/IEW.hh"
#include "debug/O3PipeView.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
DefaultIEW::DefaultIEW(FullO3CPU *_cpu, const DerivO3CPUParams &params)
namespace o3
{
IEW::IEW(CPU *_cpu, const O3CPUParams &params)
: issueToExecQueue(params.backComSize, params.forwardComSize),
cpu(_cpu),
instQueue(_cpu, this, params),
@@ -76,18 +79,18 @@ DefaultIEW::DefaultIEW(FullO3CPU *_cpu, const DerivO3CPUParams &params)
numThreads(params.numThreads),
iewStats(cpu)
{
if (dispatchWidth > O3MaxWidth)
if (dispatchWidth > MaxWidth)
fatal("dispatchWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
dispatchWidth, static_cast<int>(O3MaxWidth));
if (issueWidth > O3MaxWidth)
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
dispatchWidth, static_cast<int>(MaxWidth));
if (issueWidth > MaxWidth)
fatal("issueWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
issueWidth, static_cast<int>(O3MaxWidth));
if (wbWidth > O3MaxWidth)
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
issueWidth, static_cast<int>(MaxWidth));
if (wbWidth > MaxWidth)
fatal("wbWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
wbWidth, static_cast<int>(O3MaxWidth));
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
wbWidth, static_cast<int>(MaxWidth));
_status = Active;
exeStatus = Running;
@@ -99,7 +102,7 @@ DefaultIEW::DefaultIEW(FullO3CPU *_cpu, const DerivO3CPUParams &params)
// Instruction queue needs the queue between issue and execute.
instQueue.setIssueToExecuteQueue(&issueToExecQueue);
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
dispatchStatus[tid] = Running;
fetchRedirect[tid] = false;
}
@@ -110,33 +113,33 @@ DefaultIEW::DefaultIEW(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
std::string
DefaultIEW::name() const
IEW::name() const
{
return cpu->name() + ".iew";
}
void
DefaultIEW::regProbePoints()
IEW::regProbePoints()
{
ppDispatch = new ProbePointArg<O3DynInstPtr>(
ppDispatch = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Dispatch");
ppMispredict = new ProbePointArg<O3DynInstPtr>(
ppMispredict = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Mispredict");
/**
* Probe point with dynamic instruction as the argument used to probe when
* an instruction starts to execute.
*/
ppExecute = new ProbePointArg<O3DynInstPtr>(
ppExecute = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Execute");
/**
* Probe point with dynamic instruction as the argument used to probe when
* an instruction execution completes and it is marked ready to commit.
*/
ppToCommit = new ProbePointArg<O3DynInstPtr>(
ppToCommit = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "ToCommit");
}
DefaultIEW::IEWStats::IEWStats(FullO3CPU *cpu)
IEW::IEWStats::IEWStats(CPU *cpu)
: Stats::Group(cpu),
ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
"Number of cycles IEW is idle"),
@@ -210,7 +213,7 @@ DefaultIEW::IEWStats::IEWStats(FullO3CPU *cpu)
wbFanout = producerInst / consumerInst;
}
DefaultIEW::IEWStats::ExecutedInstStats::ExecutedInstStats(FullO3CPU *cpu)
IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu)
: Stats::Group(cpu),
ADD_STAT(numInsts, Stats::Units::Count::get(),
"Number of executed instructions"),
@@ -261,7 +264,7 @@ DefaultIEW::IEWStats::ExecutedInstStats::ExecutedInstStats(FullO3CPU *cpu)
}
void
DefaultIEW::startupStage()
IEW::startupStage()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
toRename->iewInfo[tid].usedIQ = true;
@@ -280,11 +283,11 @@ DefaultIEW::startupStage()
cpu->checker->setDcachePort(&ldstQueue.getDataPort());
}
cpu->activateStage(FullO3CPU::IEWIdx);
cpu->activateStage(CPU::IEWIdx);
}
void
DefaultIEW::clearStates(ThreadID tid)
IEW::clearStates(ThreadID tid)
{
toRename->iewInfo[tid].usedIQ = true;
toRename->iewInfo[tid].freeIQEntries =
@@ -296,7 +299,7 @@ DefaultIEW::clearStates(ThreadID tid)
}
void
DefaultIEW::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
IEW::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
@@ -313,7 +316,7 @@ DefaultIEW::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
}
void
DefaultIEW::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
IEW::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
renameQueue = rq_ptr;
@@ -322,7 +325,7 @@ DefaultIEW::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
}
void
DefaultIEW::setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr)
IEW::setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr)
{
iewQueue = iq_ptr;
@@ -331,7 +334,7 @@ DefaultIEW::setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr)
}
void
DefaultIEW::setActiveThreads(std::list<ThreadID> *at_ptr)
IEW::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
@@ -340,13 +343,13 @@ DefaultIEW::setActiveThreads(std::list<ThreadID> *at_ptr)
}
void
DefaultIEW::setScoreboard(Scoreboard *sb_ptr)
IEW::setScoreboard(Scoreboard *sb_ptr)
{
scoreboard = sb_ptr;
}
bool
DefaultIEW::isDrained() const
IEW::isDrained() const
{
bool drained = ldstQueue.isDrained() && instQueue.isDrained();
@@ -374,7 +377,7 @@ DefaultIEW::isDrained() const
}
void
DefaultIEW::drainSanityCheck() const
IEW::drainSanityCheck() const
{
assert(isDrained());
@@ -383,7 +386,7 @@ DefaultIEW::drainSanityCheck() const
}
void
DefaultIEW::takeOverFrom()
IEW::takeOverFrom()
{
// Reset all state.
_status = Active;
@@ -410,7 +413,7 @@ DefaultIEW::takeOverFrom()
}
void
DefaultIEW::squash(ThreadID tid)
IEW::squash(ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Squashing all instructions.\n", tid);
@@ -445,7 +448,7 @@ DefaultIEW::squash(ThreadID tid)
}
void
DefaultIEW::squashDueToBranch(const O3DynInstPtr& inst, ThreadID tid)
IEW::squashDueToBranch(const DynInstPtr& inst, ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] [sn:%llu] Squashing from a specific instruction,"
" PC: %s "
@@ -470,7 +473,7 @@ DefaultIEW::squashDueToBranch(const O3DynInstPtr& inst, ThreadID tid)
}
void
DefaultIEW::squashDueToMemOrder(const O3DynInstPtr& inst, ThreadID tid)
IEW::squashDueToMemOrder(const DynInstPtr& inst, ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Memory violation, squashing violator and younger "
"insts, PC: %s [sn:%llu].\n", tid, inst->pcState(), inst->seqNum);
@@ -496,7 +499,7 @@ DefaultIEW::squashDueToMemOrder(const O3DynInstPtr& inst, ThreadID tid)
}
void
DefaultIEW::block(ThreadID tid)
IEW::block(ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Blocking.\n", tid);
@@ -514,7 +517,7 @@ DefaultIEW::block(ThreadID tid)
}
void
DefaultIEW::unblock(ThreadID tid)
IEW::unblock(ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Reading instructions out of the skid "
"buffer %u.\n",tid, tid);
@@ -530,37 +533,37 @@ DefaultIEW::unblock(ThreadID tid)
}
void
DefaultIEW::wakeDependents(const O3DynInstPtr& inst)
IEW::wakeDependents(const DynInstPtr& inst)
{
instQueue.wakeDependents(inst);
}
void
DefaultIEW::rescheduleMemInst(const O3DynInstPtr& inst)
IEW::rescheduleMemInst(const DynInstPtr& inst)
{
instQueue.rescheduleMemInst(inst);
}
void
DefaultIEW::replayMemInst(const O3DynInstPtr& inst)
IEW::replayMemInst(const DynInstPtr& inst)
{
instQueue.replayMemInst(inst);
}
void
DefaultIEW::blockMemInst(const O3DynInstPtr& inst)
IEW::blockMemInst(const DynInstPtr& inst)
{
instQueue.blockMemInst(inst);
}
void
DefaultIEW::cacheUnblocked()
IEW::cacheUnblocked()
{
instQueue.cacheUnblocked();
}
void
DefaultIEW::instToCommit(const O3DynInstPtr& inst)
IEW::instToCommit(const DynInstPtr& inst)
{
// This function should not be called after writebackInsts in a
// single cycle. That will cause problems with an instruction
@@ -588,7 +591,7 @@ DefaultIEW::instToCommit(const O3DynInstPtr& inst)
}
unsigned
DefaultIEW::validInstsFromRename()
IEW::validInstsFromRename()
{
unsigned inst_count = 0;
@@ -601,9 +604,9 @@ DefaultIEW::validInstsFromRename()
}
void
DefaultIEW::skidInsert(ThreadID tid)
IEW::skidInsert(ThreadID tid)
{
O3DynInstPtr inst = NULL;
DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -622,7 +625,7 @@ DefaultIEW::skidInsert(ThreadID tid)
}
int
DefaultIEW::skidCount()
IEW::skidCount()
{
int max=0;
@@ -640,7 +643,7 @@ DefaultIEW::skidCount()
}
bool
DefaultIEW::skidsEmpty()
IEW::skidsEmpty()
{
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
@@ -656,7 +659,7 @@ DefaultIEW::skidsEmpty()
}
void
DefaultIEW::updateStatus()
IEW::updateStatus()
{
bool any_unblocking = false;
@@ -696,7 +699,7 @@ DefaultIEW::updateStatus()
}
bool
DefaultIEW::checkStall(ThreadID tid)
IEW::checkStall(ThreadID tid)
{
bool ret_val(false);
@@ -712,7 +715,7 @@ DefaultIEW::checkStall(ThreadID tid)
}
void
DefaultIEW::checkSignalsAndUpdate(ThreadID tid)
IEW::checkSignalsAndUpdate(ThreadID tid)
{
// Check if there's a squash signal, squash if there is
// Check stall signals, block if there is.
@@ -775,7 +778,7 @@ DefaultIEW::checkSignalsAndUpdate(ThreadID tid)
}
void
DefaultIEW::sortInsts()
IEW::sortInsts()
{
int insts_from_rename = fromRename->size;
#ifdef DEBUG
@@ -788,7 +791,7 @@ DefaultIEW::sortInsts()
}
void
DefaultIEW::emptyRenameInsts(ThreadID tid)
IEW::emptyRenameInsts(ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Removing incoming rename instructions\n", tid);
@@ -809,34 +812,34 @@ DefaultIEW::emptyRenameInsts(ThreadID tid)
}
void
DefaultIEW::wakeCPU()
IEW::wakeCPU()
{
cpu->wakeCPU();
}
void
DefaultIEW::activityThisCycle()
IEW::activityThisCycle()
{
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
}
void
DefaultIEW::activateStage()
IEW::activateStage()
{
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(FullO3CPU::IEWIdx);
cpu->activateStage(CPU::IEWIdx);
}
void
DefaultIEW::deactivateStage()
IEW::deactivateStage()
{
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::IEWIdx);
cpu->deactivateStage(CPU::IEWIdx);
}
void
DefaultIEW::dispatch(ThreadID tid)
IEW::dispatch(ThreadID tid)
{
// If status is Running or idle,
// call dispatchInsts()
@@ -883,17 +886,17 @@ DefaultIEW::dispatch(ThreadID tid)
}
void
DefaultIEW::dispatchInsts(ThreadID tid)
IEW::dispatchInsts(ThreadID tid)
{
// Obtain instructions from skid buffer if unblocking, or queue from rename
// otherwise.
std::queue<O3DynInstPtr> &insts_to_dispatch =
std::queue<DynInstPtr> &insts_to_dispatch =
dispatchStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
int insts_to_add = insts_to_dispatch.size();
O3DynInstPtr inst;
DynInstPtr inst;
bool add_to_iq = false;
int dis_num_inst = 0;
@@ -1119,7 +1122,7 @@ DefaultIEW::dispatchInsts(ThreadID tid)
}
void
DefaultIEW::printAvailableInsts()
IEW::printAvailableInsts()
{
int inst = 0;
@@ -1141,7 +1144,7 @@ DefaultIEW::printAvailableInsts()
}
void
DefaultIEW::executeInsts()
IEW::executeInsts()
{
wbNumInst = 0;
wbCycle = 0;
@@ -1166,7 +1169,7 @@ DefaultIEW::executeInsts()
DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
O3DynInstPtr inst = instQueue.getInstToExecute();
DynInstPtr inst = instQueue.getInstToExecute();
DPRINTF(IEW, "Execute: Processing PC %s, [tid:%i] [sn:%llu].\n",
inst->pcState(), inst->threadNumber,inst->seqNum);
@@ -1330,7 +1333,7 @@ DefaultIEW::executeInsts()
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
O3DynInstPtr violator;
DynInstPtr violator;
violator = ldstQueue.getMemDepViolator(tid);
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
@@ -1354,7 +1357,7 @@ DefaultIEW::executeInsts()
if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());
O3DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
"%s, inst PC: %s. Addr is: %#x.\n",
@@ -1387,7 +1390,7 @@ DefaultIEW::executeInsts()
}
void
DefaultIEW::writebackInsts()
IEW::writebackInsts()
{
// Loop through the head of the time buffer and wake any
// dependents. These instructions are about to write back. Also
@@ -1396,7 +1399,7 @@ DefaultIEW::writebackInsts()
// as part of backwards communication.
for (int inst_num = 0; inst_num < wbWidth &&
toCommit->insts[inst_num]; inst_num++) {
O3DynInstPtr inst = toCommit->insts[inst_num];
DynInstPtr inst = toCommit->insts[inst_num];
ThreadID tid = inst->threadNumber;
DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %s.\n",
@@ -1437,7 +1440,7 @@ DefaultIEW::writebackInsts()
}
void
DefaultIEW::tick()
IEW::tick()
{
wbNumInst = 0;
wbCycle = 0;
@@ -1566,7 +1569,7 @@ DefaultIEW::tick()
}
void
DefaultIEW::updateExeInstStats(const O3DynInstPtr& inst)
IEW::updateExeInstStats(const DynInstPtr& inst)
{
ThreadID tid = inst->threadNumber;
@@ -1597,7 +1600,7 @@ DefaultIEW::updateExeInstStats(const O3DynInstPtr& inst)
}
void
DefaultIEW::checkMisprediction(const O3DynInstPtr& inst)
IEW::checkMisprediction(const DynInstPtr& inst)
{
ThreadID tid = inst->threadNumber;
@@ -1632,3 +1635,5 @@ DefaultIEW::checkMisprediction(const O3DynInstPtr& inst)
}
}
}
} // namespace o3

View File

@@ -55,11 +55,15 @@
#include "debug/IEW.hh"
#include "sim/probe/probe.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
namespace o3
{
class FUPool;
/**
* DefaultIEW handles both single threaded and SMT IEW
* IEW handles both single threaded and SMT IEW
* (issue/execute/writeback). It handles the dispatching of
* instructions to the LSQ/IQ as part of the issue stage, and has the
* IQ try to issue instructions each cycle. The execute latency is
@@ -77,7 +81,7 @@ class FUPool;
* up any dependents, and marking the register ready on the
* scoreboard.
*/
class DefaultIEW
class IEW
{
public:
/** Overall IEW stage status. Used to determine if the CPU can
@@ -104,25 +108,25 @@ class DefaultIEW
/** Overall stage status. */
Status _status;
/** Dispatch status. */
StageStatus dispatchStatus[O3MaxThreads];
StageStatus dispatchStatus[MaxThreads];
/** Execute status. */
StageStatus exeStatus;
/** Writeback status. */
StageStatus wbStatus;
/** Probe points. */
ProbePointArg<O3DynInstPtr> *ppMispredict;
ProbePointArg<O3DynInstPtr> *ppDispatch;
ProbePointArg<DynInstPtr> *ppMispredict;
ProbePointArg<DynInstPtr> *ppDispatch;
/** To probe when instruction execution begins. */
ProbePointArg<O3DynInstPtr> *ppExecute;
ProbePointArg<DynInstPtr> *ppExecute;
/** To probe when instruction execution is complete. */
ProbePointArg<O3DynInstPtr> *ppToCommit;
ProbePointArg<DynInstPtr> *ppToCommit;
public:
/** Constructs a DefaultIEW with the given parameters. */
DefaultIEW(FullO3CPU *_cpu, const DerivO3CPUParams &params);
/** Constructs a IEW with the given parameters. */
IEW(CPU *_cpu, const O3CPUParams &params);
/** Returns the name of the DefaultIEW stage. */
/** Returns the name of the IEW stage. */
std::string name() const;
/** Registers probes. */
@@ -135,13 +139,13 @@ class DefaultIEW
void clearStates(ThreadID tid);
/** Sets main time buffer used for backwards communication. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets time buffer for getting instructions coming from rename. */
void setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr);
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets time buffer to pass on instructions to commit. */
void setIEWQueue(TimeBuffer<O3Comm::IEWStruct> *iq_ptr);
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
@@ -162,24 +166,24 @@ class DefaultIEW
void squash(ThreadID tid);
/** Wakes all dependents of a completed instruction. */
void wakeDependents(const O3DynInstPtr &inst);
void wakeDependents(const DynInstPtr &inst);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
*/
void rescheduleMemInst(const O3DynInstPtr &inst);
void rescheduleMemInst(const DynInstPtr &inst);
/** Re-executes all rescheduled memory instructions. */
void replayMemInst(const O3DynInstPtr &inst);
void replayMemInst(const DynInstPtr &inst);
/** Moves memory instruction onto the list of cache blocked instructions */
void blockMemInst(const O3DynInstPtr &inst);
void blockMemInst(const DynInstPtr &inst);
/** Notifies that the cache has become unblocked */
void cacheUnblocked();
/** Sends an instruction to commit through the time buffer. */
void instToCommit(const O3DynInstPtr &inst);
void instToCommit(const DynInstPtr &inst);
/** Inserts unused instructions of a thread into the skid buffer. */
void skidInsert(ThreadID tid);
@@ -217,7 +221,7 @@ class DefaultIEW
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
/** Check misprediction */
void checkMisprediction(const O3DynInstPtr &inst);
void checkMisprediction(const DynInstPtr &inst);
// hardware transactional memory
// For debugging purposes, it is useful to keep track of the most recent
@@ -233,12 +237,12 @@ class DefaultIEW
/** Sends commit proper information for a squash due to a branch
* mispredict.
*/
void squashDueToBranch(const O3DynInstPtr &inst, ThreadID tid);
void squashDueToBranch(const DynInstPtr &inst, ThreadID tid);
/** Sends commit proper information for a squash due to a memory order
* violation.
*/
void squashDueToMemOrder(const O3DynInstPtr &inst, ThreadID tid);
void squashDueToMemOrder(const DynInstPtr &inst, ThreadID tid);
/** Sets Dispatch to blocked, and signals back to other stages to block. */
void block(ThreadID tid);
@@ -292,53 +296,53 @@ class DefaultIEW
private:
/** Updates execution stats based on the instruction. */
void updateExeInstStats(const O3DynInstPtr &inst);
void updateExeInstStats(const DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to write information heading to previous stages. */
TimeBuffer<O3Comm::TimeStruct>::wire toFetch;
TimeBuffer<TimeStruct>::wire toFetch;
/** Wire to get commit's output from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write information heading to previous stages. */
TimeBuffer<O3Comm::TimeStruct>::wire toRename;
TimeBuffer<TimeStruct>::wire toRename;
/** Rename instruction queue interface. */
TimeBuffer<O3Comm::RenameStruct> *renameQueue;
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to get rename's output from rename queue. */
TimeBuffer<O3Comm::RenameStruct>::wire fromRename;
TimeBuffer<RenameStruct>::wire fromRename;
/** Issue stage queue. */
TimeBuffer<O3Comm::IssueStruct> issueToExecQueue;
TimeBuffer<IssueStruct> issueToExecQueue;
/** Wire to read information from the issue stage time queue. */
TimeBuffer<O3Comm::IssueStruct>::wire fromIssue;
TimeBuffer<IssueStruct>::wire fromIssue;
/**
* IEW stage time buffer. Holds ROB indices of instructions that
* can be marked as completed.
*/
TimeBuffer<O3Comm::IEWStruct> *iewQueue;
TimeBuffer<IEWStruct> *iewQueue;
/** Wire to write infromation heading to commit. */
TimeBuffer<O3Comm::IEWStruct>::wire toCommit;
TimeBuffer<IEWStruct>::wire toCommit;
/** Queue of all instructions coming from rename this cycle. */
std::queue<O3DynInstPtr> insts[O3MaxThreads];
std::queue<DynInstPtr> insts[MaxThreads];
/** Skid buffer between rename and IEW. */
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
std::queue<DynInstPtr> skidBuffer[MaxThreads];
/** Scoreboard pointer. */
Scoreboard* scoreboard;
private:
/** CPU pointer. */
FullO3CPU *cpu;
CPU *cpu;
/** Records if IEW has written to the time buffer this cycle, so that the
* CPU can deschedule itself if there is no activity.
@@ -364,7 +368,7 @@ class DefaultIEW
private:
/** Records if there is a fetch redirect on this cycle for each thread. */
bool fetchRedirect[O3MaxThreads];
bool fetchRedirect[MaxThreads];
/** Records if the queues have been changed (inserted or issued insts),
* so that IEW knows to broadcast the updated amount of free entries.
@@ -415,7 +419,7 @@ class DefaultIEW
struct IEWStats : public Stats::Group
{
IEWStats(FullO3CPU *cpu);
IEWStats(CPU *cpu);
/** Stat for total number of idle cycles. */
Stats::Scalar idleCycles;
@@ -451,7 +455,7 @@ class DefaultIEW
struct ExecutedInstStats : public Stats::Group
{
ExecutedInstStats(FullO3CPU *cpu);
ExecutedInstStats(CPU *cpu);
/** Stat for total number of executed instructions. */
Stats::Scalar numInsts;
@@ -489,4 +493,6 @@ class DefaultIEW
} iewStats;
};
} // namespace o3
#endif // __CPU_O3_IEW_HH__

View File

@@ -50,14 +50,17 @@
#include "cpu/o3/limits.hh"
#include "debug/IQ.hh"
#include "enums/OpClass.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
#include "sim/core.hh"
// clang complains about std::set being overloaded with Packet::set if
// we open up the entire namespace std
using std::list;
InstructionQueue::FUCompletion::FUCompletion(const O3DynInstPtr &_inst,
namespace o3
{
InstructionQueue::FUCompletion::FUCompletion(const DynInstPtr &_inst,
int fu_idx, InstructionQueue *iq_ptr)
: Event(Stat_Event_Pri, AutoDelete),
inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
@@ -78,8 +81,8 @@ InstructionQueue::FUCompletion::description() const
return "Functional unit completion";
}
InstructionQueue::InstructionQueue(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params)
InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr,
const O3CPUParams &params)
: cpu(cpu_ptr),
iewStage(iew_ptr),
fuPool(params.fuPool),
@@ -109,7 +112,7 @@ InstructionQueue::InstructionQueue(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
regScoreboard.resize(numPhysRegs);
//Initialize Mem Dependence Units
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
memDepUnit[tid].init(params, tid, cpu_ptr);
memDepUnit[tid].setIQ(this);
}
@@ -147,7 +150,7 @@ InstructionQueue::InstructionQueue(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
DPRINTF(IQ, "IQ sharing policy set to Threshold:"
"%i entries per thread.\n",thresholdIQ);
}
for (ThreadID tid = numThreads; tid < O3MaxThreads; tid++) {
for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
maxEntries[tid] = 0;
}
}
@@ -167,7 +170,7 @@ InstructionQueue::name() const
return cpu->name() + ".iq";
}
InstructionQueue::IQStats::IQStats(FullO3CPU *cpu, const unsigned &total_width)
InstructionQueue::IQStats::IQStats(CPU *cpu, const unsigned &total_width)
: Stats::Group(cpu),
ADD_STAT(instsAdded, Stats::Units::Count::get(),
"Number of instructions added to the IQ (excludes non-spec)"),
@@ -384,7 +387,7 @@ void
InstructionQueue::resetState()
{
//Initialize thread IQ counts
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
count[tid] = 0;
instList[tid].clear();
}
@@ -401,7 +404,7 @@ InstructionQueue::resetState()
regScoreboard[i] = false;
}
for (ThreadID tid = 0; tid < O3MaxThreads; ++tid) {
for (ThreadID tid = 0; tid < MaxThreads; ++tid) {
squashedSeqNum[tid] = 0;
}
@@ -426,14 +429,13 @@ InstructionQueue::setActiveThreads(list<ThreadID> *at_ptr)
}
void
InstructionQueue::setIssueToExecuteQueue(
TimeBuffer<O3Comm::IssueStruct> *i2e_ptr)
InstructionQueue::setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2e_ptr)
{
issueToExecuteQueue = i2e_ptr;
}
void
InstructionQueue::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
InstructionQueue::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
@@ -551,7 +553,7 @@ InstructionQueue::hasReadyInsts()
}
void
InstructionQueue::insert(const O3DynInstPtr &new_inst)
InstructionQueue::insert(const DynInstPtr &new_inst)
{
if (new_inst->isFloating()) {
iqIOStats.fpInstQueueWrites++;
@@ -596,7 +598,7 @@ InstructionQueue::insert(const O3DynInstPtr &new_inst)
}
void
InstructionQueue::insertNonSpec(const O3DynInstPtr &new_inst)
InstructionQueue::insertNonSpec(const DynInstPtr &new_inst)
{
// @todo: Clean up this code; can do it by setting inst as unable
// to issue, then calling normal insert on the inst.
@@ -642,18 +644,18 @@ InstructionQueue::insertNonSpec(const O3DynInstPtr &new_inst)
}
void
InstructionQueue::insertBarrier(const O3DynInstPtr &barr_inst)
InstructionQueue::insertBarrier(const DynInstPtr &barr_inst)
{
memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
insertNonSpec(barr_inst);
}
O3DynInstPtr
DynInstPtr
InstructionQueue::getInstToExecute()
{
assert(!instsToExecute.empty());
O3DynInstPtr inst = std::move(instsToExecute.front());
DynInstPtr inst = std::move(instsToExecute.front());
instsToExecute.pop_front();
if (inst->isFloating()) {
iqIOStats.fpInstQueueReads++;
@@ -717,7 +719,7 @@ InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it)
}
void
InstructionQueue::processFUCompletion(const O3DynInstPtr &inst, int fu_idx)
InstructionQueue::processFUCompletion(const DynInstPtr &inst, int fu_idx)
{
DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
assert(!cpu->switchedOut());
@@ -745,9 +747,9 @@ InstructionQueue::scheduleReadyInsts()
DPRINTF(IQ, "Attempting to schedule ready instructions from "
"the IQ.\n");
O3Comm::IssueStruct *i2e_info = issueToExecuteQueue->access(0);
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
O3DynInstPtr mem_inst;
DynInstPtr mem_inst;
while ((mem_inst = getDeferredMemInstToExecute())) {
addReadyMemInst(mem_inst);
}
@@ -774,7 +776,7 @@ InstructionQueue::scheduleReadyInsts()
assert(!readyInsts[op_class].empty());
O3DynInstPtr issuing_inst = readyInsts[op_class].top();
DynInstPtr issuing_inst = readyInsts[op_class].top();
if (issuing_inst->isFloating()) {
iqIOStats.fpInstQueueReads++;
@@ -951,7 +953,7 @@ InstructionQueue::commit(const InstSeqNum &inst, ThreadID tid)
}
int
InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst)
InstructionQueue::wakeDependents(const DynInstPtr &completed_inst)
{
int dependents = 0;
@@ -1019,7 +1021,7 @@ InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst)
//Go through the dependency chain, marking the registers as
//ready within the waiting instructions.
O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
while (dep_inst) {
DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
@@ -1050,7 +1052,7 @@ InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst)
}
void
InstructionQueue::addReadyMemInst(const O3DynInstPtr &ready_inst)
InstructionQueue::addReadyMemInst(const DynInstPtr &ready_inst)
{
OpClass op_class = ready_inst->opClass();
@@ -1072,7 +1074,7 @@ InstructionQueue::addReadyMemInst(const O3DynInstPtr &ready_inst)
}
void
InstructionQueue::rescheduleMemInst(const O3DynInstPtr &resched_inst)
InstructionQueue::rescheduleMemInst(const DynInstPtr &resched_inst)
{
DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
@@ -1085,19 +1087,19 @@ InstructionQueue::rescheduleMemInst(const O3DynInstPtr &resched_inst)
}
void
InstructionQueue::replayMemInst(const O3DynInstPtr &replay_inst)
InstructionQueue::replayMemInst(const DynInstPtr &replay_inst)
{
memDepUnit[replay_inst->threadNumber].replay();
}
void
InstructionQueue::deferMemInst(const O3DynInstPtr &deferred_inst)
InstructionQueue::deferMemInst(const DynInstPtr &deferred_inst)
{
deferredMemInsts.push_back(deferred_inst);
}
void
InstructionQueue::blockMemInst(const O3DynInstPtr &blocked_inst)
InstructionQueue::blockMemInst(const DynInstPtr &blocked_inst)
{
blocked_inst->clearIssued();
blocked_inst->clearCanIssue();
@@ -1112,13 +1114,13 @@ InstructionQueue::cacheUnblocked()
cpu->wakeCPU();
}
O3DynInstPtr
DynInstPtr
InstructionQueue::getDeferredMemInstToExecute()
{
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
O3DynInstPtr mem_inst = std::move(*it);
DynInstPtr mem_inst = std::move(*it);
deferredMemInsts.erase(it);
return mem_inst;
}
@@ -1126,21 +1128,21 @@ InstructionQueue::getDeferredMemInstToExecute()
return nullptr;
}
O3DynInstPtr
DynInstPtr
InstructionQueue::getBlockedMemInstToExecute()
{
if (retryMemInsts.empty()) {
return nullptr;
} else {
O3DynInstPtr mem_inst = std::move(retryMemInsts.front());
DynInstPtr mem_inst = std::move(retryMemInsts.front());
retryMemInsts.pop_front();
return mem_inst;
}
}
void
InstructionQueue::violation(const O3DynInstPtr &store,
const O3DynInstPtr &faulting_load)
InstructionQueue::violation(const DynInstPtr &store,
const DynInstPtr &faulting_load)
{
iqIOStats.intInstQueueWrites++;
memDepUnit[store->threadNumber].violation(store, faulting_load);
@@ -1177,7 +1179,7 @@ InstructionQueue::doSquash(ThreadID tid)
while (squash_it != instList[tid].end() &&
(*squash_it)->seqNum > squashedSeqNum[tid]) {
O3DynInstPtr squashed_inst = (*squash_it);
DynInstPtr squashed_inst = (*squash_it);
if (squashed_inst->isFloating()) {
iqIOStats.fpInstQueueWrites++;
} else if (squashed_inst->isVector()) {
@@ -1283,10 +1285,10 @@ InstructionQueue::doSquash(ThreadID tid)
// IQ clears out the heads of the dependency graph only when
// instructions reach writeback stage. If an instruction is squashed
// before writeback stage, its head of dependency graph would not be
// cleared out; it holds the instruction's O3DynInstPtr. This prevents
// freeing the squashed instruction's DynInst.
// Thus, we need to manually clear out the squashed instructions' heads
// of dependency graph.
// cleared out; it holds the instruction's DynInstPtr. This
// prevents freeing the squashed instruction's DynInst.
// Thus, we need to manually clear out the squashed instructions'
// heads of dependency graph.
for (int dest_reg_idx = 0;
dest_reg_idx < squashed_inst->numDestRegs();
dest_reg_idx++)
@@ -1306,13 +1308,13 @@ InstructionQueue::doSquash(ThreadID tid)
bool
InstructionQueue::PqCompare::operator()(
const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const
const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
bool
InstructionQueue::addToDependents(const O3DynInstPtr &new_inst)
InstructionQueue::addToDependents(const DynInstPtr &new_inst)
{
// Loop through the instruction's source registers, adding
// them to the dependency list if they are not ready.
@@ -1359,7 +1361,7 @@ InstructionQueue::addToDependents(const O3DynInstPtr &new_inst)
}
void
InstructionQueue::addToProducers(const O3DynInstPtr &new_inst)
InstructionQueue::addToProducers(const DynInstPtr &new_inst)
{
// Nothing really needs to be marked when an instruction becomes
// the producer of a register's value, but for convenience a ptr
@@ -1394,7 +1396,7 @@ InstructionQueue::addToProducers(const O3DynInstPtr &new_inst)
}
void
InstructionQueue::addIfReady(const O3DynInstPtr &inst)
InstructionQueue::addIfReady(const DynInstPtr &inst)
{
// If the instruction now has all of its source registers
// available, then add it to the list of ready instructions.
@@ -1563,3 +1565,5 @@ InstructionQueue::dumpInsts()
++num;
}
}
} // namespace o3

View File

@@ -61,11 +61,15 @@
#include "enums/SMTQueuePolicy.hh"
#include "sim/eventq.hh"
struct DerivO3CPUParams;
class FUPool;
struct O3CPUParams;
class MemInterface;
class DefaultIEW;
class FullO3CPU;
namespace o3
{
class FUPool;
class CPU;
class IEW;
/**
* A standard instruction queue class. It holds ready instructions, in
@@ -88,14 +92,14 @@ class InstructionQueue
{
public:
// Typedef of iterator through the list of instructions.
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
typedef typename std::list<DynInstPtr>::iterator ListIt;
/** FU completion event class. */
class FUCompletion : public Event
{
private:
/** Executing instruction. */
O3DynInstPtr inst;
DynInstPtr inst;
/** Index of the FU used for executing. */
int fuIdx;
@@ -110,7 +114,7 @@ class InstructionQueue
public:
/** Construct a FU completion event. */
FUCompletion(const O3DynInstPtr &_inst, int fu_idx,
FUCompletion(const DynInstPtr &_inst, int fu_idx,
InstructionQueue *iq_ptr);
virtual void process();
@@ -119,8 +123,7 @@ class InstructionQueue
};
/** Constructs an IQ. */
InstructionQueue(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params);
InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params);
/** Destructs the IQ. */
~InstructionQueue();
@@ -135,10 +138,10 @@ class InstructionQueue
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets the timer buffer between issue and execute. */
void setIssueToExecuteQueue(TimeBuffer<O3Comm::IssueStruct> *i2eQueue);
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
/** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Determine if we are drained. */
bool isDrained() const;
@@ -171,43 +174,43 @@ class InstructionQueue
bool hasReadyInsts();
/** Inserts a new instruction into the IQ. */
void insert(const O3DynInstPtr &new_inst);
void insert(const DynInstPtr &new_inst);
/** Inserts a new, non-speculative instruction into the IQ. */
void insertNonSpec(const O3DynInstPtr &new_inst);
void insertNonSpec(const DynInstPtr &new_inst);
/** Inserts a memory or write barrier into the IQ to make sure
* loads and stores are ordered properly.
*/
void insertBarrier(const O3DynInstPtr &barr_inst);
void insertBarrier(const DynInstPtr &barr_inst);
/** Returns the oldest scheduled instruction, and removes it from
* the list of instructions waiting to execute.
*/
O3DynInstPtr getInstToExecute();
DynInstPtr getInstToExecute();
/** Gets a memory instruction that was referred due to a delayed DTB
* translation if it is now ready to execute. NULL if none available.
*/
O3DynInstPtr getDeferredMemInstToExecute();
DynInstPtr getDeferredMemInstToExecute();
/** Gets a memory instruction that was blocked on the cache. NULL if none
* available.
*/
O3DynInstPtr getBlockedMemInstToExecute();
DynInstPtr getBlockedMemInstToExecute();
/**
* Records the instruction as the producer of a register without
* adding it to the rest of the IQ.
*/
void
recordProducer(const O3DynInstPtr &inst)
recordProducer(const DynInstPtr &inst)
{
addToProducers(inst);
}
/** Process FU completion event. */
void processFUCompletion(const O3DynInstPtr &inst, int fu_idx);
void processFUCompletion(const DynInstPtr &inst, int fu_idx);
/**
* Schedules ready instructions, adding the ready ones (oldest first) to
@@ -225,35 +228,34 @@ class InstructionQueue
void commit(const InstSeqNum &inst, ThreadID tid = 0);
/** Wakes all dependents of a completed instruction. */
int wakeDependents(const O3DynInstPtr &completed_inst);
int wakeDependents(const DynInstPtr &completed_inst);
/** Adds a ready memory instruction to the ready list. */
void addReadyMemInst(const O3DynInstPtr &ready_inst);
void addReadyMemInst(const DynInstPtr &ready_inst);
/**
* Reschedules a memory instruction. It will be ready to issue once
* replayMemInst() is called.
*/
void rescheduleMemInst(const O3DynInstPtr &resched_inst);
void rescheduleMemInst(const DynInstPtr &resched_inst);
/** Replays a memory instruction. It must be rescheduled first. */
void replayMemInst(const O3DynInstPtr &replay_inst);
void replayMemInst(const DynInstPtr &replay_inst);
/**
* Defers a memory instruction when its DTB translation incurs a hw
* page table walk.
*/
void deferMemInst(const O3DynInstPtr &deferred_inst);
void deferMemInst(const DynInstPtr &deferred_inst);
/** Defers a memory instruction when it is cache blocked. */
void blockMemInst(const O3DynInstPtr &blocked_inst);
void blockMemInst(const DynInstPtr &blocked_inst);
/** Notify instruction queue that a previous blockage has resolved */
void cacheUnblocked();
/** Indicates an ordering violation between a store and a load. */
void violation(const O3DynInstPtr &store,
const O3DynInstPtr &faulting_load);
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load);
/**
* Squashes instructions for a thread. Squashing information is obtained
@@ -276,29 +278,29 @@ class InstructionQueue
/////////////////////////
/** Pointer to the CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Cache interface. */
MemInterface *dcacheInterface;
/** Pointer to IEW stage. */
DefaultIEW *iewStage;
IEW *iewStage;
/** The memory dependence unit, which tracks/predicts memory dependences
* between instructions.
*/
MemDepUnit memDepUnit[O3MaxThreads];
MemDepUnit memDepUnit[MaxThreads];
/** The queue to the execute stage. Issued instructions will be written
* into it.
*/
TimeBuffer<O3Comm::IssueStruct> *issueToExecuteQueue;
TimeBuffer<IssueStruct> *issueToExecuteQueue;
/** The backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to read information from timebuffer. */
typename TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Function unit pool. */
FUPool *fuPool;
@@ -308,23 +310,23 @@ class InstructionQueue
//////////////////////////////////////
/** List of all the instructions in the IQ (some of which may be issued). */
std::list<O3DynInstPtr> instList[O3MaxThreads];
std::list<DynInstPtr> instList[MaxThreads];
/** List of instructions that are ready to be executed. */
std::list<O3DynInstPtr> instsToExecute;
std::list<DynInstPtr> instsToExecute;
/** List of instructions waiting for their DTB translation to
* complete (hw page table walk in progress).
*/
std::list<O3DynInstPtr> deferredMemInsts;
std::list<DynInstPtr> deferredMemInsts;
/** List of instructions that have been cache blocked. */
std::list<O3DynInstPtr> blockedMemInsts;
std::list<DynInstPtr> blockedMemInsts;
/** List of instructions that were cache blocked, but a retry has been seen
* since, so they can now be retried. May fail again go on the blocked list.
*/
std::list<O3DynInstPtr> retryMemInsts;
std::list<DynInstPtr> retryMemInsts;
/**
* Struct for comparing entries to be added to the priority queue.
@@ -335,12 +337,11 @@ class InstructionQueue
*/
struct PqCompare
{
bool operator()(const O3DynInstPtr &lhs,
const O3DynInstPtr &rhs) const;
bool operator()(const DynInstPtr &lhs, const DynInstPtr &rhs) const;
};
typedef std::priority_queue<
O3DynInstPtr, std::vector<O3DynInstPtr>, PqCompare> ReadyInstQueue;
DynInstPtr, std::vector<DynInstPtr>, PqCompare> ReadyInstQueue;
/** List of ready instructions, per op class. They are separated by op
* class to allow for easy mapping to FUs.
@@ -354,9 +355,9 @@ class InstructionQueue
* the sequence number will be available. Thus it is most efficient to be
* able to search by the sequence number alone.
*/
std::map<InstSeqNum, O3DynInstPtr> nonSpecInsts;
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
typedef typename std::map<InstSeqNum, O3DynInstPtr>::iterator NonSpecMapIt;
typedef std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
/** Entry for the list age ordering by op class. */
struct ListOrderEntry
@@ -393,7 +394,7 @@ class InstructionQueue
*/
void moveToYoungerInst(ListOrderIt age_order_it);
DependencyGraph<O3DynInstPtr> dependGraph;
DependencyGraph<DynInstPtr> dependGraph;
//////////////////////////////////////
// Various parameters
@@ -409,10 +410,10 @@ class InstructionQueue
std::list<ThreadID> *activeThreads;
/** Per Thread IQ count */
unsigned count[O3MaxThreads];
unsigned count[MaxThreads];
/** Max IQ Entries Per Thread */
unsigned maxEntries[O3MaxThreads];
unsigned maxEntries[MaxThreads];
/** Number of free IQ entries left. */
unsigned freeEntries;
@@ -435,7 +436,7 @@ class InstructionQueue
Cycles commitToIEWDelay;
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum[O3MaxThreads];
InstSeqNum squashedSeqNum[MaxThreads];
/** A cache of the recently woken registers. It is 1 if the register
* has been woken up recently, and 0 if the register has been added
@@ -446,13 +447,13 @@ class InstructionQueue
std::vector<bool> regScoreboard;
/** Adds an instruction to the dependency graph, as a consumer. */
bool addToDependents(const O3DynInstPtr &new_inst);
bool addToDependents(const DynInstPtr &new_inst);
/** Adds an instruction to the dependency graph, as a producer. */
void addToProducers(const O3DynInstPtr &new_inst);
void addToProducers(const DynInstPtr &new_inst);
/** Moves an instruction to the ready queue if it is ready. */
void addIfReady(const O3DynInstPtr &inst);
void addIfReady(const DynInstPtr &inst);
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
@@ -473,7 +474,7 @@ class InstructionQueue
struct IQStats : public Stats::Group
{
IQStats(FullO3CPU *cpu, const unsigned &total_width);
IQStats(CPU *cpu, const unsigned &total_width);
/** Stat for number of instructions added. */
Stats::Scalar instsAdded;
/** Stat for number of non-speculative instructions added. */
@@ -554,4 +555,6 @@ class InstructionQueue
} iqIOStats;
};
} // namespace o3
#endif //__CPU_O3_INST_QUEUE_HH__

View File

@@ -28,7 +28,12 @@
#ifndef __CPU_O3_LIMITS_HH__
#define __CPU_O3_LIMITS_HH__
static constexpr int O3MaxWidth = 12;
static constexpr int O3MaxThreads = 4;
namespace o3
{
static constexpr int MaxWidth = 12;
static constexpr int MaxThreads = 4;
} // namespace o3
#endif // __CPU_O3_LIMITS_HH__

View File

@@ -56,7 +56,10 @@
#include "debug/HtmCpu.hh"
#include "debug/LSQ.hh"
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
namespace o3
{
LSQ::LSQSenderState::LSQSenderState(LSQRequest *request, bool is_load) :
_request(request), isLoad(is_load), needWB(is_load)
@@ -68,12 +71,11 @@ LSQ::LSQSenderState::contextId()
return inst->contextId();
}
LSQ::DcachePort::DcachePort(LSQ *_lsq, FullO3CPU *_cpu) :
LSQ::DcachePort::DcachePort(LSQ *_lsq, CPU *_cpu) :
RequestPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), cpu(_cpu)
{}
LSQ::LSQ(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params)
LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params)
: cpu(cpu_ptr), iewStage(iew_ptr),
_cacheBlocked(false),
cacheStorePorts(params.cacheStorePorts), usedStorePorts(0),
@@ -88,7 +90,7 @@ LSQ::LSQ(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
dcachePort(this, cpu_ptr),
numThreads(params.numThreads)
{
assert(numThreads > 0 && numThreads <= O3MaxThreads);
assert(numThreads > 0 && numThreads <= MaxThreads);
//**********************************************
//************ Handle SMT Parameters ***********
@@ -221,7 +223,7 @@ LSQ::cachePortBusy(bool is_load)
}
void
LSQ::insertLoad(const O3DynInstPtr &load_inst)
LSQ::insertLoad(const DynInstPtr &load_inst)
{
ThreadID tid = load_inst->threadNumber;
@@ -229,7 +231,7 @@ LSQ::insertLoad(const O3DynInstPtr &load_inst)
}
void
LSQ::insertStore(const O3DynInstPtr &store_inst)
LSQ::insertStore(const DynInstPtr &store_inst)
{
ThreadID tid = store_inst->threadNumber;
@@ -237,7 +239,7 @@ LSQ::insertStore(const O3DynInstPtr &store_inst)
}
Fault
LSQ::executeLoad(const O3DynInstPtr &inst)
LSQ::executeLoad(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -245,7 +247,7 @@ LSQ::executeLoad(const O3DynInstPtr &inst)
}
Fault
LSQ::executeStore(const O3DynInstPtr &inst)
LSQ::executeStore(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -307,7 +309,7 @@ LSQ::violation()
bool LSQ::violation(ThreadID tid) { return thread.at(tid).violation(); }
O3DynInstPtr
DynInstPtr
LSQ::getMemDepViolator(ThreadID tid)
{
return thread.at(tid).getMemDepViolator();
@@ -766,7 +768,7 @@ LSQ::dumpInsts(ThreadID tid) const
}
Fault
LSQ::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags, uint64_t *res,
AtomicOpFunctorPtr amo_op, const std::vector<bool>& byte_enable)
{
@@ -854,7 +856,7 @@ LSQ::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
void
LSQ::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
::ThreadContext* tc, BaseTLB::Mode mode)
{
_fault.push_back(fault);
numInTranslationFragments = 0;
@@ -886,7 +888,7 @@ LSQ::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
void
LSQ::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
::ThreadContext* tc, BaseTLB::Mode mode)
{
int i;
for (i = 0; i < _requests.size() && _requests[i] != req; i++);
@@ -1035,7 +1037,7 @@ LSQ::SplitDataRequest::initiateTranslation()
}
LSQ::LSQRequest::LSQRequest(
LSQUnit *port, const O3DynInstPtr& inst, bool isLoad) :
LSQUnit *port, const DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
@@ -1049,7 +1051,7 @@ LSQ::LSQRequest::LSQRequest(
}
LSQ::LSQRequest::LSQRequest(
LSQUnit *port, const O3DynInstPtr& inst, bool isLoad,
LSQUnit *port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op)
: _state(State::NotIssued), _senderState(nullptr),
@@ -1272,14 +1274,15 @@ LSQ::SplitDataRequest::sendPacketToCache()
}
Cycles
LSQ::SingleDataRequest::handleLocalAccess(ThreadContext *thread, PacketPtr pkt)
LSQ::SingleDataRequest::handleLocalAccess(
::ThreadContext *thread, PacketPtr pkt)
{
return pkt->req->localAccessor(thread, pkt);
}
Cycles
LSQ::SplitDataRequest::handleLocalAccess(
ThreadContext *thread, PacketPtr mainPkt)
::ThreadContext *thread, PacketPtr mainPkt)
{
Cycles delay(0);
unsigned offset = 0;
@@ -1363,7 +1366,7 @@ LSQ::DcachePort::recvReqRetry()
lsq->recvReqRetry();
}
LSQ::HtmCmdRequest::HtmCmdRequest(LSQUnit* port, const O3DynInstPtr& inst,
LSQ::HtmCmdRequest::HtmCmdRequest(LSQUnit* port, const DynInstPtr& inst,
const Request::Flags& flags_) :
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
nullptr, nullptr, nullptr)
@@ -1409,7 +1412,7 @@ LSQ::HtmCmdRequest::initiateTranslation()
void
LSQ::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode)
::ThreadContext* tc, BaseTLB::Mode mode)
{
panic("unexpected behaviour");
}
@@ -1429,3 +1432,5 @@ LSQ::write(LSQRequest* req, uint8_t *data, int store_idx)
return thread.at(tid).write(req, data, store_idx);
}
} // namespace o3

View File

@@ -59,10 +59,13 @@
#include "mem/port.hh"
#include "sim/sim_object.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
class FullO3CPU;
class DefaultIEW;
namespace o3
{
class CPU;
class IEW;
class LSQUnit;
class LSQ
@@ -81,7 +84,7 @@ class LSQ
public:
/** Instruction which initiated the access to memory. */
O3DynInstPtr inst;
DynInstPtr inst;
/** The main packet from a split load, used during writeback. */
PacketPtr mainPkt = nullptr;
/** A second packet from a split store that needs sending. */
@@ -121,11 +124,11 @@ class LSQ
/** Pointer to LSQ. */
LSQ *lsq;
FullO3CPU *cpu;
CPU *cpu;
public:
/** Default constructor. */
DcachePort(LSQ *_lsq, FullO3CPU *_cpu);
DcachePort(LSQ *_lsq, CPU *_cpu);
protected:
@@ -279,7 +282,7 @@ class LSQ
public:
LSQUnit& _port;
const O3DynInstPtr _inst;
const DynInstPtr _inst;
uint32_t _taskId;
PacketDataPtr _data;
std::vector<PacketPtr> _packets;
@@ -294,8 +297,8 @@ class LSQ
AtomicOpFunctorPtr _amo_op;
protected:
LSQUnit* lsqUnit() { return &_port; }
LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad);
LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad,
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad);
LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr);
@@ -377,11 +380,7 @@ class LSQ
request()->setContext(context_id);
}
const O3DynInstPtr&
instruction()
{
return _inst;
}
const DynInstPtr& instruction() { return _inst; }
/** Set up virtual request.
* For a previously allocated Request objects.
@@ -482,7 +481,7 @@ class LSQ
* Memory mapped IPR accesses
*/
virtual Cycles handleLocalAccess(
ThreadContext *thread, PacketPtr pkt) = 0;
::ThreadContext *thread, PacketPtr pkt) = 0;
/**
* Test if the request accesses a particular cache line.
@@ -655,7 +654,7 @@ class LSQ
using LSQRequest::_numOutstandingPackets;
using LSQRequest::_amo_op;
public:
SingleDataRequest(LSQUnit* port, const O3DynInstPtr& inst,
SingleDataRequest(LSQUnit* port, const DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr) :
@@ -665,11 +664,12 @@ class LSQ
virtual ~SingleDataRequest() {}
virtual void initiateTranslation();
virtual void finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode);
::ThreadContext* tc, BaseTLB::Mode mode);
virtual bool recvTimingResp(PacketPtr pkt);
virtual void sendPacketToCache();
virtual void buildPackets();
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt);
virtual Cycles handleLocalAccess(
::ThreadContext *thread, PacketPtr pkt);
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
virtual std::string name() const { return "SingleDataRequest"; }
};
@@ -693,12 +693,12 @@ class LSQ
using LSQRequest::flags;
using LSQRequest::setState;
public:
HtmCmdRequest(LSQUnit* port, const O3DynInstPtr& inst,
HtmCmdRequest(LSQUnit* port, const DynInstPtr& inst,
const Request::Flags& flags_);
virtual ~HtmCmdRequest() {}
virtual void initiateTranslation();
virtual void finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode);
::ThreadContext* tc, BaseTLB::Mode mode);
virtual std::string name() const { return "HtmCmdRequest"; }
};
@@ -740,7 +740,7 @@ class LSQ
PacketPtr _mainPacket;
public:
SplitDataRequest(LSQUnit* port, const O3DynInstPtr& inst,
SplitDataRequest(LSQUnit* port, const DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags & flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr) :
@@ -764,13 +764,14 @@ class LSQ
}
}
virtual void finish(const Fault &fault, const RequestPtr &req,
ThreadContext* tc, BaseTLB::Mode mode);
::ThreadContext* tc, BaseTLB::Mode mode);
virtual bool recvTimingResp(PacketPtr pkt);
virtual void initiateTranslation();
virtual void sendPacketToCache();
virtual void buildPackets();
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt);
virtual Cycles handleLocalAccess(
::ThreadContext *thread, PacketPtr pkt);
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
virtual RequestPtr mainRequest();
@@ -779,8 +780,7 @@ class LSQ
};
/** Constructs an LSQ with the given parameters. */
LSQ(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params);
LSQ(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params);
/** Returns the name of the LSQ. */
std::string name() const;
@@ -802,15 +802,15 @@ class LSQ
void tick();
/** Inserts a load into the LSQ. */
void insertLoad(const O3DynInstPtr &load_inst);
void insertLoad(const DynInstPtr &load_inst);
/** Inserts a store into the LSQ. */
void insertStore(const O3DynInstPtr &store_inst);
void insertStore(const DynInstPtr &store_inst);
/** Executes a load. */
Fault executeLoad(const O3DynInstPtr &inst);
Fault executeLoad(const DynInstPtr &inst);
/** Executes a store. */
Fault executeStore(const O3DynInstPtr &inst);
Fault executeStore(const DynInstPtr &inst);
/**
* Commits loads up until the given sequence number for a specific thread.
@@ -845,7 +845,7 @@ class LSQ
bool violation(ThreadID tid);
/** Gets the instruction that caused the memory ordering violation. */
O3DynInstPtr getMemDepViolator(ThreadID tid);
DynInstPtr getMemDepViolator(ThreadID tid);
/** Returns the head index of the load queue for a specific thread. */
int getLoadHead(ThreadID tid);
@@ -983,16 +983,16 @@ class LSQ
void recvTimingSnoopReq(PacketPtr pkt);
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op,
const std::vector<bool>& byte_enable);
/** The CPU pointer. */
FullO3CPU *cpu;
CPU *cpu;
/** The IEW stage pointer. */
DefaultIEW *iewStage;
IEW *iewStage;
/** Is D-cache blocked? */
bool cacheBlocked() const;
@@ -1068,4 +1068,6 @@ class LSQ
ThreadID numThreads;
};
} // namespace o3
#endif // __CPU_O3_LSQ_HH__

View File

@@ -56,7 +56,10 @@
#include "mem/packet.hh"
#include "mem/request.hh"
LSQUnit::WritebackEvent::WritebackEvent(const O3DynInstPtr &_inst,
namespace o3
{
LSQUnit::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,
PacketPtr _pkt, LSQUnit *lsq_ptr)
: Event(Default_Pri, AutoDelete),
inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
@@ -104,7 +107,7 @@ void
LSQUnit::completeDataAccess(PacketPtr pkt)
{
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
O3DynInstPtr inst = state->inst;
DynInstPtr inst = state->inst;
// hardware transactional memory
// sanity check
@@ -204,8 +207,8 @@ LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
}
void
LSQUnit::init(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params, LSQ *lsq_ptr, unsigned id)
LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params,
LSQ *lsq_ptr, unsigned id)
{
lsqID = id;
@@ -248,7 +251,7 @@ LSQUnit::resetState()
std::string
LSQUnit::name() const
{
if (O3MaxThreads == 1) {
if (MaxThreads == 1) {
return iewStage->name() + ".lsq";
} else {
return iewStage->name() + ".lsq.thread" + std::to_string(lsqID);
@@ -299,7 +302,7 @@ LSQUnit::takeOverFrom()
}
void
LSQUnit::insert(const O3DynInstPtr &inst)
LSQUnit::insert(const DynInstPtr &inst)
{
assert(inst->isMemRef());
@@ -315,7 +318,7 @@ LSQUnit::insert(const O3DynInstPtr &inst)
}
void
LSQUnit::insertLoad(const O3DynInstPtr &load_inst)
LSQUnit::insertLoad(const DynInstPtr &load_inst)
{
assert(!loadQueue.full());
assert(loads < loadQueue.capacity());
@@ -377,7 +380,7 @@ LSQUnit::insertLoad(const O3DynInstPtr &load_inst)
}
void
LSQUnit::insertStore(const O3DynInstPtr& store_inst)
LSQUnit::insertStore(const DynInstPtr& store_inst)
{
// Make sure it is not full before inserting an instruction.
assert(!storeQueue.full());
@@ -397,10 +400,10 @@ LSQUnit::insertStore(const O3DynInstPtr& store_inst)
++stores;
}
O3DynInstPtr
DynInstPtr
LSQUnit::getMemDepViolator()
{
O3DynInstPtr temp = memDepViolator;
DynInstPtr temp = memDepViolator;
memDepViolator = NULL;
@@ -437,7 +440,7 @@ LSQUnit::checkSnoop(PacketPtr pkt)
DPRINTF(LSQUnit, "Got snoop for address %#x\n", pkt->getAddr());
for (int x = 0; x < cpu->numContexts(); x++) {
ThreadContext *tc = cpu->getContext(x);
::ThreadContext *tc = cpu->getContext(x);
bool no_squash = cpu->thread[x]->noSquashFromTC;
cpu->thread[x]->noSquashFromTC = true;
TheISA::handleLockedSnoop(tc, pkt, cacheBlockMask);
@@ -451,7 +454,7 @@ LSQUnit::checkSnoop(PacketPtr pkt)
Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
O3DynInstPtr ld_inst = iter->instruction();
DynInstPtr ld_inst = iter->instruction();
assert(ld_inst);
LSQRequest *req = iter->request();
@@ -510,7 +513,7 @@ LSQUnit::checkSnoop(PacketPtr pkt)
Fault
LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
const O3DynInstPtr& inst)
const DynInstPtr& inst)
{
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
@@ -521,7 +524,7 @@ LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
* like the implementation that came before it, we're overly conservative.
*/
while (loadIt != loadQueue.end()) {
O3DynInstPtr ld_inst = loadIt->instruction();
DynInstPtr ld_inst = loadIt->instruction();
if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
++loadIt;
continue;
@@ -589,7 +592,7 @@ LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt,
Fault
LSQUnit::executeLoad(const O3DynInstPtr &inst)
LSQUnit::executeLoad(const DynInstPtr &inst)
{
// Execute a specific load.
Fault load_fault = NoFault;
@@ -655,7 +658,7 @@ LSQUnit::executeLoad(const O3DynInstPtr &inst)
}
Fault
LSQUnit::executeStore(const O3DynInstPtr &store_inst)
LSQUnit::executeStore(const DynInstPtr &store_inst)
{
// Make sure that a store exists.
assert(stores != 0);
@@ -805,7 +808,7 @@ LSQUnit::writebackStores()
assert(storeWBIt->hasRequest());
assert(!storeWBIt->committed());
O3DynInstPtr inst = storeWBIt->instruction();
DynInstPtr inst = storeWBIt->instruction();
LSQRequest* req = storeWBIt->request();
// Process store conditionals or store release after all previous
@@ -886,7 +889,7 @@ LSQUnit::writebackStores()
if (req->request()->isLocalAccess()) {
assert(!inst->isStoreConditional());
assert(!inst->inHtmTransactionalState());
ThreadContext *thread = cpu->tcBase(lsqID);
::ThreadContext *thread = cpu->tcBase(lsqID);
PacketPtr main_pkt = new Packet(req->mainRequest(),
MemCmd::WriteReq);
main_pkt->dataStatic(inst->memData);
@@ -1067,7 +1070,7 @@ LSQUnit::storePostSend()
}
void
LSQUnit::writeback(const O3DynInstPtr &inst, PacketPtr pkt)
LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt)
{
iewStage->wakeCPU();
@@ -1142,7 +1145,7 @@ LSQUnit::completeStore(typename StoreQueue::iterator store_idx)
/* We 'need' a copy here because we may clear the entry from the
* store queue. */
O3DynInstPtr store_inst = store_idx->instruction();
DynInstPtr store_inst = store_idx->instruction();
if (store_idx == storeQueue.begin()) {
do {
storeQueue.front().clear();
@@ -1248,7 +1251,7 @@ LSQUnit::dumpInsts() const
cprintf("Load queue: ");
for (const auto& e: loadQueue) {
const O3DynInstPtr &inst(e.instruction());
const DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
}
cprintf("\n");
@@ -1257,7 +1260,7 @@ LSQUnit::dumpInsts() const
cprintf("Store queue: ");
for (const auto& e: storeQueue) {
const O3DynInstPtr &inst(e.instruction());
const DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
}
@@ -1278,7 +1281,7 @@ Fault
LSQUnit::read(LSQRequest *req, int load_idx)
{
LQEntry& load_req = loadQueue[load_idx];
const O3DynInstPtr& load_inst = load_req.instruction();
const DynInstPtr& load_inst = load_req.instruction();
load_req.setRequest(req);
assert(load_inst);
@@ -1330,7 +1333,7 @@ LSQUnit::read(LSQRequest *req, int load_idx)
assert(!load_inst->inHtmTransactionalState());
load_inst->memData = new uint8_t[MaxDataBytes];
ThreadContext *thread = cpu->tcBase(lsqID);
::ThreadContext *thread = cpu->tcBase(lsqID);
PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
main_pkt->dataStatic(load_inst->memData);
@@ -1643,3 +1646,5 @@ LSQUnit::getStoreHeadSeqNum()
else
return 0;
}
} // namespace o3

View File

@@ -64,10 +64,13 @@
#include "mem/packet.hh"
#include "mem/port.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
#include "base/circular_queue.hh"
class DefaultIEW;
namespace o3
{
class IEW;
/**
* Class that implements the actual LQ and SQ for each specific
@@ -93,7 +96,7 @@ class LSQUnit
{
private:
/** The instruction. */
O3DynInstPtr inst;
DynInstPtr inst;
/** The request. */
LSQRequest* req = nullptr;
/** The size of the operation. */
@@ -123,7 +126,7 @@ class LSQUnit
}
void
set(const O3DynInstPtr& new_inst)
set(const DynInstPtr& new_inst)
{
assert(!_valid);
inst = new_inst;
@@ -139,7 +142,7 @@ class LSQUnit
bool valid() const { return _valid; }
uint32_t& size() { return _size; }
const uint32_t& size() const { return _size; }
const O3DynInstPtr& instruction() const { return inst; }
const DynInstPtr& instruction() const { return inst; }
/** @} */
};
@@ -168,11 +171,7 @@ class LSQUnit
std::memset(_data, 0, DataSize);
}
void
set(const O3DynInstPtr& inst)
{
LSQEntry::set(inst);
}
void set(const DynInstPtr& inst) { LSQEntry::set(inst); }
void
clear()
@@ -223,8 +222,8 @@ class LSQUnit
}
/** Initializes the LSQ unit with the specified number of entries. */
void init(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr,
const DerivO3CPUParams &params, LSQ *lsq_ptr, unsigned id);
void init(CPU *cpu_ptr, IEW *iew_ptr, const O3CPUParams &params,
LSQ *lsq_ptr, unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
@@ -239,11 +238,11 @@ class LSQUnit
void takeOverFrom();
/** Inserts an instruction. */
void insert(const O3DynInstPtr &inst);
void insert(const DynInstPtr &inst);
/** Inserts a load instruction. */
void insertLoad(const O3DynInstPtr &load_inst);
void insertLoad(const DynInstPtr &load_inst);
/** Inserts a store instruction. */
void insertStore(const O3DynInstPtr &store_inst);
void insertStore(const DynInstPtr &store_inst);
/** Check for ordering violations in the LSQ. For a store squash if we
* ever find a conflicting load. For a load, only squash if we
@@ -252,7 +251,7 @@ class LSQUnit
* @param inst the instruction to check
*/
Fault checkViolations(typename LoadQueue::iterator& loadIt,
const O3DynInstPtr& inst);
const DynInstPtr& inst);
/** Check if an incoming invalidate hits in the lsq on a load
* that might have issued out of order wrt another load beacuse
@@ -261,11 +260,11 @@ class LSQUnit
void checkSnoop(PacketPtr pkt);
/** Executes a load instruction. */
Fault executeLoad(const O3DynInstPtr &inst);
Fault executeLoad(const DynInstPtr &inst);
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
/** Executes a store instruction. */
Fault executeStore(const O3DynInstPtr &inst);
Fault executeStore(const DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
@@ -291,7 +290,7 @@ class LSQUnit
bool violation() { return memDepViolator; }
/** Returns the memory ordering violator. */
O3DynInstPtr getMemDepViolator();
DynInstPtr getMemDepViolator();
/** Returns the number of free LQ entries. */
unsigned numFreeLoadEntries();
@@ -364,7 +363,7 @@ class LSQUnit
void resetState();
/** Writes back the instruction, sending it to IEW. */
void writeback(const O3DynInstPtr &inst, PacketPtr pkt);
void writeback(const DynInstPtr &inst, PacketPtr pkt);
/** Try to finish a previously blocked write back attempt */
void writebackBlockedStore();
@@ -393,10 +392,10 @@ class LSQUnit
private:
/** Pointer to the CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Pointer to the IEW stage. */
DefaultIEW *iewStage;
IEW *iewStage;
/** Pointer to the LSQ. */
LSQ *lsq;
@@ -446,7 +445,7 @@ class LSQUnit
{
public:
/** Constructs a writeback event. */
WritebackEvent(const O3DynInstPtr &_inst, PacketPtr pkt,
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt,
LSQUnit *lsq_ptr);
/** Processes the writeback event. */
@@ -457,7 +456,7 @@ class LSQUnit
private:
/** Instruction whose results are being written back. */
O3DynInstPtr inst;
DynInstPtr inst;
/** The packet that would have been sent to memory. */
PacketPtr pkt;
@@ -517,7 +516,7 @@ class LSQUnit
Addr cacheBlockMask;
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<O3Comm::IssueStruct>::wire fromIssue;
typename TimeBuffer<IssueStruct>::wire fromIssue;
/** Whether or not the LSQ is stalled. */
bool stalled;
@@ -538,7 +537,7 @@ class LSQUnit
bool storeInFlight;
/** The oldest load that caused a memory ordering violation. */
O3DynInstPtr memDepViolator;
DynInstPtr memDepViolator;
/** Flag for memory model. */
bool needsTSO;
@@ -601,4 +600,6 @@ class LSQUnit
typedef CircularQueue<SQEntry> SQueue;
};
} // namespace o3
#endif // __CPU_O3_LSQ_UNIT_HH__

View File

@@ -38,7 +38,10 @@
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/limits.hh"
#include "debug/MemDepUnit.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
namespace o3
{
#ifdef DEBUG
int MemDepUnit::MemDepEntry::memdep_count = 0;
@@ -48,7 +51,7 @@ int MemDepUnit::MemDepEntry::memdep_erase = 0;
MemDepUnit::MemDepUnit() : iqPtr(NULL), stats(nullptr) {}
MemDepUnit::MemDepUnit(const DerivO3CPUParams &params)
MemDepUnit::MemDepUnit(const O3CPUParams &params)
: _name(params.name + ".memdepunit"),
depPred(params.store_set_clear_period, params.SSITSize,
params.LFSTSize),
@@ -60,7 +63,7 @@ MemDepUnit::MemDepUnit(const DerivO3CPUParams &params)
MemDepUnit::~MemDepUnit()
{
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
ListIt inst_list_it = instList[tid].begin();
@@ -83,7 +86,7 @@ MemDepUnit::~MemDepUnit()
}
void
MemDepUnit::init(const DerivO3CPUParams &params, ThreadID tid, FullO3CPU *cpu)
MemDepUnit::init(const O3CPUParams &params, ThreadID tid, CPU *cpu)
{
DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid);
@@ -116,7 +119,7 @@ MemDepUnit::isDrained() const
bool drained = instsToReplay.empty()
&& memDepHash.empty()
&& instsToReplay.empty();
for (int i = 0; i < O3MaxThreads; ++i)
for (int i = 0; i < MaxThreads; ++i)
drained = drained && instList[i].empty();
return drained;
@@ -127,7 +130,7 @@ MemDepUnit::drainSanityCheck() const
{
assert(instsToReplay.empty());
assert(memDepHash.empty());
for (int i = 0; i < O3MaxThreads; ++i)
for (int i = 0; i < MaxThreads; ++i)
assert(instList[i].empty());
assert(instsToReplay.empty());
assert(memDepHash.empty());
@@ -149,7 +152,7 @@ MemDepUnit::setIQ(InstructionQueue *iq_ptr)
}
void
MemDepUnit::insertBarrierSN(const O3DynInstPtr &barr_inst)
MemDepUnit::insertBarrierSN(const DynInstPtr &barr_inst)
{
InstSeqNum barr_sn = barr_inst->seqNum;
@@ -181,7 +184,7 @@ MemDepUnit::insertBarrierSN(const O3DynInstPtr &barr_inst)
}
void
MemDepUnit::insert(const O3DynInstPtr &inst)
MemDepUnit::insert(const DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -292,7 +295,7 @@ MemDepUnit::insert(const O3DynInstPtr &inst)
}
void
MemDepUnit::insertNonSpec(const O3DynInstPtr &inst)
MemDepUnit::insertNonSpec(const DynInstPtr &inst)
{
insertBarrier(inst);
@@ -314,7 +317,7 @@ MemDepUnit::insertNonSpec(const O3DynInstPtr &inst)
}
void
MemDepUnit::insertBarrier(const O3DynInstPtr &barr_inst)
MemDepUnit::insertBarrier(const DynInstPtr &barr_inst)
{
ThreadID tid = barr_inst->threadNumber;
@@ -336,7 +339,7 @@ MemDepUnit::insertBarrier(const O3DynInstPtr &barr_inst)
}
void
MemDepUnit::regsReady(const O3DynInstPtr &inst)
MemDepUnit::regsReady(const DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking registers as ready for "
"instruction PC %s [sn:%lli].\n",
@@ -358,7 +361,7 @@ MemDepUnit::regsReady(const O3DynInstPtr &inst)
}
void
MemDepUnit::nonSpecInstReady(const O3DynInstPtr &inst)
MemDepUnit::nonSpecInstReady(const DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking non speculative "
"instruction PC %s as ready [sn:%lli].\n",
@@ -370,7 +373,7 @@ MemDepUnit::nonSpecInstReady(const O3DynInstPtr &inst)
}
void
MemDepUnit::reschedule(const O3DynInstPtr &inst)
MemDepUnit::reschedule(const DynInstPtr &inst)
{
instsToReplay.push_back(inst);
}
@@ -378,7 +381,7 @@ MemDepUnit::reschedule(const O3DynInstPtr &inst)
void
MemDepUnit::replay()
{
O3DynInstPtr temp_inst;
DynInstPtr temp_inst;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
@@ -396,7 +399,7 @@ MemDepUnit::replay()
}
void
MemDepUnit::completed(const O3DynInstPtr &inst)
MemDepUnit::completed(const DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
@@ -419,7 +422,7 @@ MemDepUnit::completed(const O3DynInstPtr &inst)
}
void
MemDepUnit::completeInst(const O3DynInstPtr &inst)
MemDepUnit::completeInst(const DynInstPtr &inst)
{
wakeDependents(inst);
completed(inst);
@@ -450,7 +453,7 @@ MemDepUnit::completeInst(const O3DynInstPtr &inst)
}
void
MemDepUnit::wakeDependents(const O3DynInstPtr &inst)
MemDepUnit::wakeDependents(const DynInstPtr &inst)
{
// Only stores, atomics and barriers have dependents.
if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() &&
@@ -485,7 +488,7 @@ MemDepUnit::wakeDependents(const O3DynInstPtr &inst)
inst_entry->dependInsts.clear();
}
MemDepUnit::MemDepEntry::MemDepEntry(const O3DynInstPtr &new_inst) :
MemDepUnit::MemDepEntry::MemDepEntry(const DynInstPtr &new_inst) :
inst(new_inst)
{
#ifdef DEBUG
@@ -562,8 +565,8 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, ThreadID tid)
}
void
MemDepUnit::violation(const O3DynInstPtr &store_inst,
const O3DynInstPtr &violating_load)
MemDepUnit::violation(const DynInstPtr &store_inst,
const DynInstPtr &violating_load)
{
DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
" load: %#x, store: %#x\n", violating_load->instAddr(),
@@ -573,7 +576,7 @@ MemDepUnit::violation(const O3DynInstPtr &store_inst,
}
void
MemDepUnit::issue(const O3DynInstPtr &inst)
MemDepUnit::issue(const DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
inst->instAddr(), inst->seqNum);
@@ -582,7 +585,7 @@ MemDepUnit::issue(const O3DynInstPtr &inst)
}
MemDepUnit::MemDepEntryPtr &
MemDepUnit::findInHash(const O3DynInstConstPtr &inst)
MemDepUnit::findInHash(const DynInstConstPtr &inst)
{
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
@@ -606,7 +609,7 @@ MemDepUnit::moveToReady(MemDepEntryPtr &woken_inst_entry)
void
MemDepUnit::dumpLists()
{
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
cprintf("Instruction list %i size: %i\n",
tid, instList[tid].size());
@@ -632,3 +635,5 @@ MemDepUnit::dumpLists()
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
#endif
}
} // namespace o3

View File

@@ -65,10 +65,13 @@ struct SNHash
}
};
struct DerivO3CPUParams;
struct O3CPUParams;
namespace o3
{
class CPU;
class InstructionQueue;
class FullO3CPU;
/**
* Memory dependency unit class. This holds the memory dependence predictor.
@@ -91,7 +94,7 @@ class MemDepUnit
MemDepUnit();
/** Constructs a MemDepUnit with given parameters. */
MemDepUnit(const DerivO3CPUParams &params);
MemDepUnit(const O3CPUParams &params);
/** Frees up any memory allocated. */
~MemDepUnit();
@@ -100,7 +103,7 @@ class MemDepUnit
std::string name() const { return _name; }
/** Initializes the unit with parameters and a thread id. */
void init(const DerivO3CPUParams &params, ThreadID tid, FullO3CPU *cpu);
void init(const O3CPUParams &params, ThreadID tid, CPU *cpu);
/** Determine if we are drained. */
bool isDrained() const;
@@ -115,22 +118,22 @@ class MemDepUnit
void setIQ(InstructionQueue *iq_ptr);
/** Inserts a memory instruction. */
void insert(const O3DynInstPtr &inst);
void insert(const DynInstPtr &inst);
/** Inserts a non-speculative memory instruction. */
void insertNonSpec(const O3DynInstPtr &inst);
void insertNonSpec(const DynInstPtr &inst);
/** Inserts a barrier instruction. */
void insertBarrier(const O3DynInstPtr &barr_inst);
void insertBarrier(const DynInstPtr &barr_inst);
/** Indicate that an instruction has its registers ready. */
void regsReady(const O3DynInstPtr &inst);
void regsReady(const DynInstPtr &inst);
/** Indicate that a non-speculative instruction is ready. */
void nonSpecInstReady(const O3DynInstPtr &inst);
void nonSpecInstReady(const DynInstPtr &inst);
/** Reschedules an instruction to be re-executed. */
void reschedule(const O3DynInstPtr &inst);
void reschedule(const DynInstPtr &inst);
/** Replays all instructions that have been rescheduled by moving them to
* the ready list.
@@ -138,7 +141,7 @@ class MemDepUnit
void replay();
/** Notifies completion of an instruction. */
void completeInst(const O3DynInstPtr &inst);
void completeInst(const DynInstPtr &inst);
/** Squashes all instructions up until a given sequence number for a
* specific thread.
@@ -146,11 +149,11 @@ class MemDepUnit
void squash(const InstSeqNum &squashed_num, ThreadID tid);
/** Indicates an ordering violation between a store and a younger load. */
void violation(const O3DynInstPtr &store_inst,
const O3DynInstPtr &violating_load);
void violation(const DynInstPtr &store_inst,
const DynInstPtr &violating_load);
/** Issues the given instruction */
void issue(const O3DynInstPtr &inst);
void issue(const DynInstPtr &inst);
/** Debugging function to dump the lists of instructions. */
void dumpLists();
@@ -158,12 +161,12 @@ class MemDepUnit
private:
/** Completes a memory instruction. */
void completed(const O3DynInstPtr &inst);
void completed(const DynInstPtr &inst);
/** Wakes any dependents of a memory instruction. */
void wakeDependents(const O3DynInstPtr &inst);
void wakeDependents(const DynInstPtr &inst);
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
typedef typename std::list<DynInstPtr>::iterator ListIt;
class MemDepEntry;
@@ -177,7 +180,7 @@ class MemDepUnit
{
public:
/** Constructs a memory dependence entry. */
MemDepEntry(const O3DynInstPtr &new_inst);
MemDepEntry(const DynInstPtr &new_inst);
/** Frees any pointers. */
~MemDepEntry();
@@ -186,7 +189,7 @@ class MemDepUnit
std::string name() const { return "memdepentry"; }
/** The instruction being tracked. */
O3DynInstPtr inst;
DynInstPtr inst;
/** The iterator to the instruction's location inside the list. */
ListIt listIt;
@@ -212,7 +215,7 @@ class MemDepUnit
};
/** Finds the memory dependence entry in the hash map. */
MemDepEntryPtr &findInHash(const O3DynInstConstPtr& inst);
MemDepEntryPtr &findInHash(const DynInstConstPtr& inst);
/** Moves an entry to the ready list. */
void moveToReady(MemDepEntryPtr &ready_inst_entry);
@@ -225,10 +228,10 @@ class MemDepUnit
MemDepHash memDepHash;
/** A list of all instructions in the memory dependence unit. */
std::list<O3DynInstPtr> instList[O3MaxThreads];
std::list<DynInstPtr> instList[MaxThreads];
/** A list of all instructions that are going to be replayed. */
std::list<O3DynInstPtr> instsToReplay;
std::list<DynInstPtr> instsToReplay;
/** The memory dependence predictor. It is accessed upon new
* instructions being added to the IQ, and responds by telling
@@ -250,7 +253,7 @@ class MemDepUnit
bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); }
/** Inserts the SN of a barrier inst. to the list of tracked barriers */
void insertBarrierSN(const O3DynInstPtr &barr_inst);
void insertBarrierSN(const DynInstPtr &barr_inst);
/** Pointer to the IQ. */
InstructionQueue *iqPtr;
@@ -273,4 +276,6 @@ class MemDepUnit
} stats;
};
} // namespace o3
#endif // __CPU_O3_MEM_DEP_UNIT_HH__

View File

@@ -37,6 +37,7 @@ from m5.objects.Probe import *
class ElasticTrace(ProbeListenerObject):
type = 'ElasticTrace'
cxx_class = 'o3::ElasticTrace'
cxx_header = 'cpu/o3/probe/elastic_trace.hh'
# Trace files for the following params are created in the output directory.

View File

@@ -37,4 +37,5 @@ from m5.objects.Probe import *
class SimpleTrace(ProbeListenerObject):
type = 'SimpleTrace'
cxx_class = 'o3::SimpleTrace'
cxx_header = 'cpu/o3/probe/simple_trace.hh'

View File

@@ -45,6 +45,9 @@
#include "debug/ElasticTrace.hh"
#include "mem/packet.hh"
namespace o3
{
ElasticTrace::ElasticTrace(const ElasticTraceParams &params)
: ProbeListenerObject(params),
regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
@@ -58,7 +61,7 @@ ElasticTrace::ElasticTrace(const ElasticTraceParams &params)
traceVirtAddr(params.traceVirtAddr),
stats(this)
{
cpu = dynamic_cast<FullO3CPU *>(params.manager);
cpu = dynamic_cast<CPU *>(params.manager);
const BaseISA::RegClasses &regClasses =
cpu->getContext(0)->getIsaPtr()->regClasses();
zeroReg = regClasses.at(IntRegClass).zeroReg();
@@ -125,21 +128,21 @@ ElasticTrace::regEtraceListeners()
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
"FetchRequest", &ElasticTrace::fetchReqTrace));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
O3DynInstConstPtr>(this, "Execute",
DynInstConstPtr>(this, "Execute",
&ElasticTrace::recordExecTick));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
O3DynInstConstPtr>(this, "ToCommit",
DynInstConstPtr>(this, "ToCommit",
&ElasticTrace::recordToCommTick));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
O3DynInstConstPtr>(this, "Rename",
DynInstConstPtr>(this, "Rename",
&ElasticTrace::updateRegDep));
listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
"SquashInRename", &ElasticTrace::removeRegDepMapEntry));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
O3DynInstConstPtr>(this, "Squash",
DynInstConstPtr>(this, "Squash",
&ElasticTrace::addSquashedInst));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
O3DynInstConstPtr>(this, "Commit",
DynInstConstPtr>(this, "Commit",
&ElasticTrace::addCommittedInst));
allProbesReg = true;
}
@@ -167,7 +170,7 @@ ElasticTrace::fetchReqTrace(const RequestPtr &req)
}
void
ElasticTrace::recordExecTick(const O3DynInstConstPtr& dyn_inst)
ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
{
// In a corner case, a retired instruction is propagated backward to the
@@ -204,7 +207,7 @@ ElasticTrace::recordExecTick(const O3DynInstConstPtr& dyn_inst)
}
void
ElasticTrace::recordToCommTick(const O3DynInstConstPtr& dyn_inst)
ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
{
// If tracing has just been enabled then the instruction at this stage of
// execution is far enough that we cannot gather info about its past like
@@ -225,7 +228,7 @@ ElasticTrace::recordToCommTick(const O3DynInstConstPtr& dyn_inst)
}
void
ElasticTrace::updateRegDep(const O3DynInstConstPtr& dyn_inst)
ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
{
// Get the sequence number of the instruction
InstSeqNum seq_num = dyn_inst->seqNum;
@@ -304,7 +307,7 @@ ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
}
void
ElasticTrace::addSquashedInst(const O3DynInstConstPtr& head_inst)
ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
{
// If the squashed instruction was squashed before being processed by
// execute stage then it will not be in the temporary store. In this case
@@ -332,7 +335,7 @@ ElasticTrace::addSquashedInst(const O3DynInstConstPtr& head_inst)
}
void
ElasticTrace::addCommittedInst(const O3DynInstConstPtr& head_inst)
ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
{
DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
head_inst->seqNum);
@@ -391,7 +394,7 @@ ElasticTrace::addCommittedInst(const O3DynInstConstPtr& head_inst)
}
void
ElasticTrace::addDepTraceRecord(const O3DynInstConstPtr& head_inst,
ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
InstExecInfo* exec_info_ptr, bool commit)
{
// Create a record to assign dynamic intruction related fields.
@@ -653,7 +656,7 @@ ElasticTrace::hasCompCompleted(TraceInfo* past_record,
}
void
ElasticTrace::clearTempStoreUntil(const O3DynInstConstPtr& head_inst)
ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
{
// Clear from temp store starting with the execution info object
// corresponding the head_inst and continue clearing by decrementing the
@@ -929,3 +932,5 @@ ElasticTrace::flushTraces()
delete dataTraceStream;
delete instTraceStream;
}
} // namespace o3

View File

@@ -61,7 +61,10 @@
#include "sim/eventq.hh"
#include "sim/probe/probe.hh"
class FullO3CPU;
namespace o3
{
class CPU;
/**
* The elastic trace is a type of probe listener and listens to probe points
@@ -130,7 +133,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void recordExecTick(const O3DynInstConstPtr& dyn_inst);
void recordExecTick(const DynInstConstPtr& dyn_inst);
/**
* Populate the timestamp field in an InstExecInfo object for an
@@ -139,7 +142,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void recordToCommTick(const O3DynInstConstPtr& dyn_inst);
void recordToCommTick(const DynInstConstPtr& dyn_inst);
/**
* Record a Read After Write physical register dependency if there has
@@ -150,7 +153,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void updateRegDep(const O3DynInstConstPtr& dyn_inst);
void updateRegDep(const DynInstConstPtr& dyn_inst);
/**
* When an instruction gets squashed the destination register mapped to it
@@ -167,14 +170,14 @@ class ElasticTrace : public ProbeListenerObject
*
* @param head_inst pointer to dynamic instruction to be squashed
*/
void addSquashedInst(const O3DynInstConstPtr& head_inst);
void addSquashedInst(const DynInstConstPtr& head_inst);
/**
* Add an instruction that is at the head of the ROB and is committed.
*
* @param head_inst pointer to dynamic instruction to be committed
*/
void addCommittedInst(const O3DynInstConstPtr& head_inst);
void addCommittedInst(const DynInstConstPtr& head_inst);
/** Event to trigger registering this listener for all probe points. */
EventFunctionWrapper regEtraceListenersEvent;
@@ -368,7 +371,7 @@ class ElasticTrace : public ProbeListenerObject
const bool traceVirtAddr;
/** Pointer to the O3CPU that is this listener's parent a.k.a. manager */
FullO3CPU *cpu;
CPU *cpu;
/**
* Add a record to the dependency trace depTrace which is a sequential
@@ -380,7 +383,7 @@ class ElasticTrace : public ProbeListenerObject
* @param exec_info_ptr Pointer to InstExecInfo for that instruction
* @param commit True if instruction is committed, false if squashed
*/
void addDepTraceRecord(const O3DynInstConstPtr& head_inst,
void addDepTraceRecord(const DynInstConstPtr& head_inst,
InstExecInfo* exec_info_ptr, bool commit);
/**
@@ -389,7 +392,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param head_inst pointer to dynamic instruction
*/
void clearTempStoreUntil(const O3DynInstConstPtr& head_inst);
void clearTempStoreUntil(const DynInstConstPtr& head_inst);
/**
* Calculate the computational delay between an instruction and a
@@ -558,4 +561,7 @@ class ElasticTrace : public ProbeListenerObject
} stats;
};
} // namespace o3
#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__

View File

@@ -41,26 +41,34 @@
#include "cpu/o3/dyn_inst.hh"
#include "debug/SimpleTrace.hh"
void SimpleTrace::traceCommit(const O3DynInstConstPtr& dynInst)
namespace o3
{
void
SimpleTrace::traceCommit(const DynInstConstPtr& dynInst)
{
DPRINTFR(SimpleTrace, "[%s]: Commit 0x%08x %s.\n", name(),
dynInst->instAddr(),
dynInst->staticInst->disassemble(dynInst->instAddr()));
}
void SimpleTrace::traceFetch(const O3DynInstConstPtr& dynInst)
void
SimpleTrace::traceFetch(const DynInstConstPtr& dynInst)
{
DPRINTFR(SimpleTrace, "[%s]: Fetch 0x%08x %s.\n", name(),
dynInst->instAddr(),
dynInst->staticInst->disassemble(dynInst->instAddr()));
}
void SimpleTrace::regProbeListeners()
void
SimpleTrace::regProbeListeners()
{
typedef ProbeListenerArg<SimpleTrace,
O3DynInstConstPtr> DynInstListener;
DynInstConstPtr> DynInstListener;
listeners.push_back(new DynInstListener(this, "Commit",
&SimpleTrace::traceCommit));
listeners.push_back(new DynInstListener(this, "Fetch",
&SimpleTrace::traceFetch));
}
} // namespace o3

View File

@@ -48,11 +48,14 @@
#include "params/SimpleTrace.hh"
#include "sim/probe/probe.hh"
namespace o3
{
class SimpleTrace : public ProbeListenerObject
{
public:
SimpleTrace(const SimpleTraceParams &params):
SimpleTrace(const SimpleTraceParams &params) :
ProbeListenerObject(params)
{
}
@@ -68,8 +71,11 @@ class SimpleTrace : public ProbeListenerObject
}
private:
void traceFetch(const O3DynInstConstPtr& dynInst);
void traceCommit(const O3DynInstConstPtr& dynInst);
void traceFetch(const DynInstConstPtr& dynInst);
void traceCommit(const DynInstConstPtr& dynInst);
};
} // namespace o3
#endif//__CPU_O3_PROBE_SIMPLE_TRACE_HH__

View File

@@ -45,6 +45,9 @@
#include "arch/generic/types.hh"
#include "cpu/o3/free_list.hh"
namespace o3
{
PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs,
unsigned _numPhysicalVecRegs,
@@ -231,3 +234,4 @@ PhysRegFile::getTrueId(PhysRegIdPtr reg)
return nullptr;
}
} // namespace o3

View File

@@ -52,6 +52,9 @@
#include "debug/IEW.hh"
#include "enums/VecRegRenameMode.hh"
namespace o3
{
class UnifiedFreeList;
/**
@@ -361,5 +364,6 @@ class PhysRegFile
PhysRegIdPtr getTrueId(PhysRegIdPtr reg);
};
} // namespace o3
#endif //__CPU_O3_REGFILE_HH__

View File

@@ -50,9 +50,12 @@
#include "debug/Activity.hh"
#include "debug/O3PipeView.hh"
#include "debug/Rename.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
DefaultRename::DefaultRename(FullO3CPU *_cpu, const DerivO3CPUParams &params)
namespace o3
{
Rename::Rename(CPU *_cpu, const O3CPUParams &params)
: cpu(_cpu),
iewToRenameDelay(params.iewToRenameDelay),
decodeToRenameDelay(params.decodeToRenameDelay),
@@ -61,14 +64,14 @@ DefaultRename::DefaultRename(FullO3CPU *_cpu, const DerivO3CPUParams &params)
numThreads(params.numThreads),
stats(_cpu)
{
if (renameWidth > O3MaxWidth)
if (renameWidth > MaxWidth)
fatal("renameWidth (%d) is larger than compiled limit (%d),\n"
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
renameWidth, static_cast<int>(O3MaxWidth));
"\tincrease MaxWidth in src/cpu/o3/limits.hh\n",
renameWidth, static_cast<int>(MaxWidth));
// @todo: Make into a parameter.
skidBufferMax = (decodeToRenameDelay + 1) * params.decodeWidth;
for (uint32_t tid = 0; tid < O3MaxThreads; tid++) {
for (uint32_t tid = 0; tid < MaxThreads; tid++) {
renameStatus[tid] = Idle;
renameMap[tid] = nullptr;
instsInProgress[tid] = 0;
@@ -83,12 +86,12 @@ DefaultRename::DefaultRename(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
std::string
DefaultRename::name() const
Rename::name() const
{
return cpu->name() + ".rename";
}
DefaultRename::RenameStats::RenameStats(Stats::Group *parent)
Rename::RenameStats::RenameStats(Stats::Group *parent)
: Stats::Group(parent, "rename"),
ADD_STAT(squashCycles, Stats::Units::Cycle::get(),
"Number of cycles rename is squashing"),
@@ -170,16 +173,16 @@ DefaultRename::RenameStats::RenameStats(Stats::Group *parent)
}
void
DefaultRename::regProbePoints()
Rename::regProbePoints()
{
ppRename = new ProbePointArg<O3DynInstPtr>(
ppRename = new ProbePointArg<DynInstPtr>(
cpu->getProbeManager(), "Rename");
ppSquashInRename = new ProbePointArg<SeqNumRegPair>(cpu->getProbeManager(),
"SquashInRename");
}
void
DefaultRename::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
Rename::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
@@ -194,7 +197,7 @@ DefaultRename::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
}
void
DefaultRename::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
Rename::setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr)
{
renameQueue = rq_ptr;
@@ -203,7 +206,7 @@ DefaultRename::setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr)
}
void
DefaultRename::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
Rename::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
decodeQueue = dq_ptr;
@@ -212,13 +215,13 @@ DefaultRename::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
}
void
DefaultRename::startupStage()
Rename::startupStage()
{
resetStage();
}
void
DefaultRename::clearStates(ThreadID tid)
Rename::clearStates(ThreadID tid)
{
renameStatus[tid] = Idle;
@@ -239,7 +242,7 @@ DefaultRename::clearStates(ThreadID tid)
}
void
DefaultRename::resetStage()
Rename::resetStage()
{
_status = Inactive;
@@ -270,33 +273,33 @@ DefaultRename::resetStage()
}
void
DefaultRename::setActiveThreads(std::list<ThreadID> *at_ptr)
Rename::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
void
DefaultRename::setRenameMap(UnifiedRenameMap rm_ptr[])
Rename::setRenameMap(UnifiedRenameMap rm_ptr[])
{
for (ThreadID tid = 0; tid < numThreads; tid++)
renameMap[tid] = &rm_ptr[tid];
}
void
DefaultRename::setFreeList(UnifiedFreeList *fl_ptr)
Rename::setFreeList(UnifiedFreeList *fl_ptr)
{
freeList = fl_ptr;
}
void
DefaultRename::setScoreboard(Scoreboard *_scoreboard)
Rename::setScoreboard(Scoreboard *_scoreboard)
{
scoreboard = _scoreboard;
}
bool
DefaultRename::isDrained() const
Rename::isDrained() const
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (instsInProgress[tid] != 0 ||
@@ -310,13 +313,13 @@ DefaultRename::isDrained() const
}
void
DefaultRename::takeOverFrom()
Rename::takeOverFrom()
{
resetStage();
}
void
DefaultRename::drainSanityCheck() const
Rename::drainSanityCheck() const
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
assert(historyBuffer[tid].empty());
@@ -327,7 +330,7 @@ DefaultRename::drainSanityCheck() const
}
void
DefaultRename::squash(const InstSeqNum &squash_seq_num, ThreadID tid)
Rename::squash(const InstSeqNum &squash_seq_num, ThreadID tid)
{
DPRINTF(Rename, "[tid:%i] [squash sn:%llu] Squashing instructions.\n",
tid,squash_seq_num);
@@ -380,7 +383,7 @@ DefaultRename::squash(const InstSeqNum &squash_seq_num, ThreadID tid)
}
void
DefaultRename::tick()
Rename::tick()
{
wroteToTimeBuffer = false;
@@ -443,7 +446,7 @@ DefaultRename::tick()
}
void
DefaultRename::rename(bool &status_change, ThreadID tid)
Rename::rename(bool &status_change, ThreadID tid)
{
// If status is Running or idle,
// call renameInsts()
@@ -498,7 +501,7 @@ DefaultRename::rename(bool &status_change, ThreadID tid)
}
void
DefaultRename::renameInsts(ThreadID tid)
Rename::renameInsts(ThreadID tid)
{
// Instructions can be either in the skid buffer or the queue of
// instructions coming from decode, depending on the status.
@@ -593,7 +596,7 @@ DefaultRename::renameInsts(ThreadID tid)
assert(!insts_to_rename.empty());
O3DynInstPtr inst = insts_to_rename.front();
DynInstPtr inst = insts_to_rename.front();
//For all kind of instructions, check ROB and IQ first For load
//instruction, check LQ size and take into account the inflight loads
@@ -753,9 +756,9 @@ DefaultRename::renameInsts(ThreadID tid)
}
void
DefaultRename::skidInsert(ThreadID tid)
Rename::skidInsert(ThreadID tid)
{
O3DynInstPtr inst = NULL;
DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -785,11 +788,11 @@ DefaultRename::skidInsert(ThreadID tid)
}
void
DefaultRename::sortInsts()
Rename::sortInsts()
{
int insts_from_decode = fromDecode->size;
for (int i = 0; i < insts_from_decode; ++i) {
const O3DynInstPtr &inst = fromDecode->insts[i];
const DynInstPtr &inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
#if TRACING_ON
if (Debug::O3PipeView) {
@@ -800,7 +803,7 @@ DefaultRename::sortInsts()
}
bool
DefaultRename::skidsEmpty()
Rename::skidsEmpty()
{
std::list<ThreadID>::iterator threads = activeThreads->begin();
std::list<ThreadID>::iterator end = activeThreads->end();
@@ -816,7 +819,7 @@ DefaultRename::skidsEmpty()
}
void
DefaultRename::updateStatus()
Rename::updateStatus()
{
bool any_unblocking = false;
@@ -839,7 +842,7 @@ DefaultRename::updateStatus()
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(FullO3CPU::RenameIdx);
cpu->activateStage(CPU::RenameIdx);
}
} else {
// If it's not unblocking, then rename will not have any internal
@@ -848,13 +851,13 @@ DefaultRename::updateStatus()
_status = Inactive;
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(FullO3CPU::RenameIdx);
cpu->deactivateStage(CPU::RenameIdx);
}
}
}
bool
DefaultRename::block(ThreadID tid)
Rename::block(ThreadID tid)
{
DPRINTF(Rename, "[tid:%i] Blocking.\n", tid);
@@ -887,7 +890,7 @@ DefaultRename::block(ThreadID tid)
}
bool
DefaultRename::unblock(ThreadID tid)
Rename::unblock(ThreadID tid)
{
DPRINTF(Rename, "[tid:%i] Trying to unblock.\n", tid);
@@ -907,7 +910,7 @@ DefaultRename::unblock(ThreadID tid)
}
void
DefaultRename::doSquash(const InstSeqNum &squashed_seq_num, ThreadID tid)
Rename::doSquash(const InstSeqNum &squashed_seq_num, ThreadID tid)
{
auto hb_it = historyBuffer[tid].begin();
@@ -955,7 +958,7 @@ DefaultRename::doSquash(const InstSeqNum &squashed_seq_num, ThreadID tid)
}
void
DefaultRename::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid)
Rename::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid)
{
DPRINTF(Rename, "[tid:%i] Removing a committed instruction from the "
"history buffer %u (size=%i), until [sn:%llu].\n",
@@ -1004,9 +1007,9 @@ DefaultRename::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid)
}
void
DefaultRename::renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid)
Rename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
{
ThreadContext *tc = inst->tcBase();
::ThreadContext *tc = inst->tcBase();
UnifiedRenameMap *map = renameMap[tid];
unsigned num_src_regs = inst->numSrcRegs();
@@ -1070,9 +1073,9 @@ DefaultRename::renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid)
}
void
DefaultRename::renameDestRegs(const O3DynInstPtr &inst, ThreadID tid)
Rename::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
{
ThreadContext *tc = inst->tcBase();
::ThreadContext *tc = inst->tcBase();
UnifiedRenameMap *map = renameMap[tid];
unsigned num_dest_regs = inst->numDestRegs();
@@ -1123,7 +1126,7 @@ DefaultRename::renameDestRegs(const O3DynInstPtr &inst, ThreadID tid)
}
int
DefaultRename::calcFreeROBEntries(ThreadID tid)
Rename::calcFreeROBEntries(ThreadID tid)
{
int num_free = freeEntries[tid].robEntries -
(instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
@@ -1134,7 +1137,7 @@ DefaultRename::calcFreeROBEntries(ThreadID tid)
}
int
DefaultRename::calcFreeIQEntries(ThreadID tid)
Rename::calcFreeIQEntries(ThreadID tid)
{
int num_free = freeEntries[tid].iqEntries -
(instsInProgress[tid] - fromIEW->iewInfo[tid].dispatched);
@@ -1145,7 +1148,7 @@ DefaultRename::calcFreeIQEntries(ThreadID tid)
}
int
DefaultRename::calcFreeLQEntries(ThreadID tid)
Rename::calcFreeLQEntries(ThreadID tid)
{
int num_free = freeEntries[tid].lqEntries -
(loadsInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToLQ);
@@ -1158,7 +1161,7 @@ DefaultRename::calcFreeLQEntries(ThreadID tid)
}
int
DefaultRename::calcFreeSQEntries(ThreadID tid)
Rename::calcFreeSQEntries(ThreadID tid)
{
int num_free = freeEntries[tid].sqEntries -
(storesInProgress[tid] - fromIEW->iewInfo[tid].dispatchedToSQ);
@@ -1170,7 +1173,7 @@ DefaultRename::calcFreeSQEntries(ThreadID tid)
}
unsigned
DefaultRename::validInsts()
Rename::validInsts()
{
unsigned inst_count = 0;
@@ -1183,7 +1186,7 @@ DefaultRename::validInsts()
}
void
DefaultRename::readStallSignals(ThreadID tid)
Rename::readStallSignals(ThreadID tid)
{
if (fromIEW->iewBlock[tid]) {
stalls[tid].iew = true;
@@ -1196,7 +1199,7 @@ DefaultRename::readStallSignals(ThreadID tid)
}
bool
DefaultRename::checkStall(ThreadID tid)
Rename::checkStall(ThreadID tid)
{
bool ret_val = false;
@@ -1227,7 +1230,7 @@ DefaultRename::checkStall(ThreadID tid)
}
void
DefaultRename::readFreeEntries(ThreadID tid)
Rename::readFreeEntries(ThreadID tid)
{
if (fromIEW->iewInfo[tid].usedIQ)
freeEntries[tid].iqEntries = fromIEW->iewInfo[tid].freeIQEntries;
@@ -1262,7 +1265,7 @@ DefaultRename::readFreeEntries(ThreadID tid)
}
bool
DefaultRename::checkSignalsAndUpdate(ThreadID tid)
Rename::checkSignalsAndUpdate(ThreadID tid)
{
// Check if there's a squash signal, squash if there is
// Check stall signals, block if necessary.
@@ -1329,7 +1332,7 @@ DefaultRename::checkSignalsAndUpdate(ThreadID tid)
DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to "
"unblocking.\n", tid);
O3DynInstPtr serial_inst = serializeInst[tid];
DynInstPtr serial_inst = serializeInst[tid];
renameStatus[tid] = Unblocking;
@@ -1361,7 +1364,7 @@ DefaultRename::checkSignalsAndUpdate(ThreadID tid)
}
void
DefaultRename::serializeAfter(InstQueue &inst_list, ThreadID tid)
Rename::serializeAfter(InstQueue &inst_list, ThreadID tid)
{
if (inst_list.empty()) {
// Mark a bit to say that I must serialize on the next instruction.
@@ -1374,7 +1377,7 @@ DefaultRename::serializeAfter(InstQueue &inst_list, ThreadID tid)
}
void
DefaultRename::incrFullStat(const FullSource &source)
Rename::incrFullStat(const FullSource &source)
{
switch (source) {
case ROB:
@@ -1396,7 +1399,7 @@ DefaultRename::incrFullStat(const FullSource &source)
}
void
DefaultRename::dumpHistory()
Rename::dumpHistory()
{
std::list<RenameHistory>::iterator buf_it;
@@ -1419,3 +1422,5 @@ DefaultRename::dumpHistory()
}
}
}
} // namespace o3

View File

@@ -56,10 +56,13 @@
#include "cpu/timebuf.hh"
#include "sim/probe/probe.hh"
struct DerivO3CPUParams;
struct O3CPUParams;
namespace o3
{
/**
* DefaultRename handles both single threaded and SMT rename. Its
* Rename handles both single threaded and SMT rename. Its
* width is specified by the parameters; each cycle it tries to rename
* that many instructions. It holds onto the rename history of all
* instructions with destination registers, storing the
@@ -70,14 +73,14 @@ struct DerivO3CPUParams;
* and does so by stalling on the instruction until the ROB is empty
* and there are no instructions in flight to the ROB.
*/
class DefaultRename
class Rename
{
public:
// A deque is used to queue the instructions. Barrier insts must
// be added to the front of the queue, which is the only reason for
// using a deque instead of a queue. (Most other stages use a
// queue)
typedef std::deque<O3DynInstPtr> InstQueue;
typedef std::deque<DynInstPtr> InstQueue;
public:
/** Overall rename status. Used to determine if the CPU can
@@ -106,12 +109,12 @@ class DefaultRename
RenameStatus _status;
/** Per-thread status. */
ThreadStatus renameStatus[O3MaxThreads];
ThreadStatus renameStatus[MaxThreads];
/** Probe points. */
typedef std::pair<InstSeqNum, PhysRegIdPtr> SeqNumRegPair;
/** To probe when register renaming for an instruction is complete */
ProbePointArg<O3DynInstPtr> *ppRename;
ProbePointArg<DynInstPtr> *ppRename;
/**
* To probe when an instruction is squashed and the register mapping
* for it needs to be undone
@@ -119,8 +122,8 @@ class DefaultRename
ProbePointArg<SeqNumRegPair> *ppSquashInRename;
public:
/** DefaultRename constructor. */
DefaultRename(FullO3CPU *_cpu, const DerivO3CPUParams &params);
/** Rename constructor. */
Rename(CPU *_cpu, const O3CPUParams &params);
/** Returns the name of rename. */
std::string name() const;
@@ -129,30 +132,30 @@ class DefaultRename
void regProbePoints();
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr);
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setRenameQueue(TimeBuffer<O3Comm::RenameStruct> *rq_ptr);
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets pointer to time buffer coming from decode. */
void setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr);
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
/** Sets pointer to IEW stage. Used only for initialization. */
void setIEWStage(DefaultIEW *iew_stage) { iew_ptr = iew_stage; }
void setIEWStage(IEW *iew_stage) { iew_ptr = iew_stage; }
/** Sets pointer to commit stage. Used only for initialization. */
void
setCommitStage(DefaultCommit *commit_stage)
setCommitStage(Commit *commit_stage)
{
commit_ptr = commit_stage;
}
private:
/** Pointer to IEW stage. Used only for initialization. */
DefaultIEW *iew_ptr;
IEW *iew_ptr;
/** Pointer to commit stage. Used only for initialization. */
DefaultCommit *commit_ptr;
Commit *commit_ptr;
public:
/** Initializes variables for the stage. */
@@ -165,7 +168,7 @@ class DefaultRename
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to rename maps (per-thread structures). */
void setRenameMap(UnifiedRenameMap rm_ptr[O3MaxThreads]);
void setRenameMap(UnifiedRenameMap rm_ptr[MaxThreads]);
/** Sets pointer to the free list. */
void setFreeList(UnifiedFreeList *fl_ptr);
@@ -244,10 +247,10 @@ class DefaultRename
void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid);
/** Renames the source registers of an instruction. */
void renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid);
void renameSrcRegs(const DynInstPtr &inst, ThreadID tid);
/** Renames the destination registers of an instruction. */
void renameDestRegs(const O3DynInstPtr &inst, ThreadID tid);
void renameDestRegs(const DynInstPtr &inst, ThreadID tid);
/** Calculates the number of free ROB entries for a specific thread. */
int calcFreeROBEntries(ThreadID tid);
@@ -313,43 +316,43 @@ class DefaultRename
/** A per-thread list of all destination register renames, used to either
* undo rename mappings or free old physical registers.
*/
std::list<RenameHistory> historyBuffer[O3MaxThreads];
std::list<RenameHistory> historyBuffer[MaxThreads];
/** Pointer to CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get IEW's output from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's output from backwards time buffer. */
TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write infromation heading to previous stages. */
TimeBuffer<O3Comm::TimeStruct>::wire toDecode;
TimeBuffer<TimeStruct>::wire toDecode;
/** Rename instruction queue. */
TimeBuffer<O3Comm::RenameStruct> *renameQueue;
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to write any information heading to IEW. */
TimeBuffer<O3Comm::RenameStruct>::wire toIEW;
TimeBuffer<RenameStruct>::wire toIEW;
/** Decode instruction queue interface. */
TimeBuffer<O3Comm::DecodeStruct> *decodeQueue;
TimeBuffer<DecodeStruct> *decodeQueue;
/** Wire to get decode's output from decode queue. */
TimeBuffer<O3Comm::DecodeStruct>::wire fromDecode;
TimeBuffer<DecodeStruct>::wire fromDecode;
/** Queue of all instructions coming from decode this cycle. */
InstQueue insts[O3MaxThreads];
InstQueue insts[MaxThreads];
/** Skid buffer between rename and decode. */
InstQueue skidBuffer[O3MaxThreads];
InstQueue skidBuffer[MaxThreads];
/** Rename map interface. */
UnifiedRenameMap *renameMap[O3MaxThreads];
UnifiedRenameMap *renameMap[MaxThreads];
/** Free list interface. */
UnifiedFreeList *freeList;
@@ -363,17 +366,17 @@ class DefaultRename
/** Count of instructions in progress that have been sent off to the IQ
* and ROB, but are not yet included in their occupancy counts.
*/
int instsInProgress[O3MaxThreads];
int instsInProgress[MaxThreads];
/** Count of Load instructions in progress that have been sent off to the
* IQ and ROB, but are not yet included in their occupancy counts.
*/
int loadsInProgress[O3MaxThreads];
int loadsInProgress[MaxThreads];
/** Count of Store instructions in progress that have been sent off to the
* IQ and ROB, but are not yet included in their occupancy counts.
*/
int storesInProgress[O3MaxThreads];
int storesInProgress[MaxThreads];
/** Variable that tracks if decode has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
@@ -394,13 +397,13 @@ class DefaultRename
/** Per-thread tracking of the number of free entries of back-end
* structures.
*/
FreeEntries freeEntries[O3MaxThreads];
FreeEntries freeEntries[MaxThreads];
/** Records if the ROB is empty. In SMT mode the ROB may be dynamically
* partitioned between threads, so the ROB must tell rename when it is
* empty.
*/
bool emptyROB[O3MaxThreads];
bool emptyROB[MaxThreads];
/** Source of possible stalls. */
struct Stalls
@@ -410,15 +413,15 @@ class DefaultRename
};
/** Tracks which stages are telling decode to stall. */
Stalls stalls[O3MaxThreads];
Stalls stalls[MaxThreads];
/** The serialize instruction that rename has stalled on. */
O3DynInstPtr serializeInst[O3MaxThreads];
DynInstPtr serializeInst[MaxThreads];
/** Records if rename needs to serialize on the next instruction for any
* thread.
*/
bool serializeOnNextInst[O3MaxThreads];
bool serializeOnNextInst[MaxThreads];
/** Delay between iew and rename, in ticks. */
int iewToRenameDelay;
@@ -530,4 +533,6 @@ class DefaultRename
} stats;
};
} // namespace o3
#endif // __CPU_O3_RENAME_HH__

View File

@@ -47,7 +47,8 @@
#include "cpu/reg_class.hh"
#include "debug/Rename.hh"
/**** SimpleRenameMap methods ****/
namespace o3
{
SimpleRenameMap::SimpleRenameMap()
: freeList(NULL), zeroReg(IntRegClass, 0)
@@ -214,3 +215,5 @@ UnifiedRenameMap::switchMode(VecMode newVecMode)
}
}
} // namespace o3

View File

@@ -54,6 +54,9 @@
#include "cpu/reg_class.hh"
#include "enums/VecRegRenameMode.hh"
namespace o3
{
/**
* Register rename map for a single class of registers (e.g., integer
* or floating point). Because the register class is implicitly
@@ -399,4 +402,6 @@ class UnifiedRenameMap
};
} // namespace o3
#endif //__CPU_O3_RENAME_MAP_HH__

View File

@@ -47,9 +47,12 @@
#include "cpu/o3/limits.hh"
#include "debug/Fetch.hh"
#include "debug/ROB.hh"
#include "params/DerivO3CPU.hh"
#include "params/O3CPU.hh"
ROB::ROB(FullO3CPU *_cpu, const DerivO3CPUParams &params)
namespace o3
{
ROB::ROB(CPU *_cpu, const O3CPUParams &params)
: robPolicy(params.smtROBPolicy),
cpu(_cpu),
numEntries(params.numROBEntries),
@@ -87,7 +90,7 @@ ROB::ROB(FullO3CPU *_cpu, const DerivO3CPUParams &params)
}
}
for (ThreadID tid = numThreads; tid < O3MaxThreads; tid++) {
for (ThreadID tid = numThreads; tid < MaxThreads; tid++) {
maxEntries[tid] = 0;
}
@@ -97,7 +100,7 @@ ROB::ROB(FullO3CPU *_cpu, const DerivO3CPUParams &params)
void
ROB::resetState()
{
for (ThreadID tid = 0; tid < O3MaxThreads; tid++) {
for (ThreadID tid = 0; tid < MaxThreads; tid++) {
threadEntries[tid] = 0;
squashIt[tid] = instList[tid].end();
squashedSeqNum[tid] = 0;
@@ -188,7 +191,7 @@ ROB::countInsts(ThreadID tid)
}
void
ROB::insertInst(const O3DynInstPtr &inst)
ROB::insertInst(const DynInstPtr &inst)
{
assert(inst);
@@ -234,7 +237,7 @@ ROB::retireHead(ThreadID tid)
// Get the head ROB instruction by copying it and remove it from the list
InstIt head_it = instList[tid].begin();
O3DynInstPtr head_inst = std::move(*head_it);
DynInstPtr head_inst = std::move(*head_it);
instList[tid].erase(head_it);
assert(head_inst->readyToCommit());
@@ -410,7 +413,7 @@ ROB::updateHead()
InstIt head_thread = instList[tid].begin();
O3DynInstPtr head_inst = (*head_thread);
DynInstPtr head_inst = (*head_thread);
assert(head_inst != 0);
@@ -492,7 +495,7 @@ ROB::squash(InstSeqNum squash_num, ThreadID tid)
}
}
const O3DynInstPtr&
const DynInstPtr&
ROB::readHeadInst(ThreadID tid)
{
if (threadEntries[tid] != 0) {
@@ -506,7 +509,7 @@ ROB::readHeadInst(ThreadID tid)
}
}
O3DynInstPtr
DynInstPtr
ROB::readTailInst(ThreadID tid)
{
InstIt tail_thread = instList[tid].end();
@@ -522,7 +525,7 @@ ROB::ROBStats::ROBStats(Stats::Group *parent)
{
}
O3DynInstPtr
DynInstPtr
ROB::findInst(ThreadID tid, InstSeqNum squash_inst)
{
for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {
@@ -532,3 +535,5 @@ ROB::findInst(ThreadID tid, InstSeqNum squash_inst)
}
return NULL;
}
} // namespace o3

View File

@@ -54,7 +54,12 @@
#include "cpu/reg_class.hh"
#include "enums/SMTQueuePolicy.hh"
class FullO3CPU;
struct O3CPUParams;
namespace o3
{
class CPU;
struct DerivO3CPUParams;
@@ -65,7 +70,7 @@ class ROB
{
public:
typedef std::pair<RegIndex, RegIndex> UnmapInfo;
typedef typename std::list<O3DynInstPtr>::iterator InstIt;
typedef typename std::list<DynInstPtr>::iterator InstIt;
/** Possible ROB statuses. */
enum Status
@@ -77,7 +82,7 @@ class ROB
private:
/** Per-thread ROB status. */
Status robStatus[O3MaxThreads];
Status robStatus[MaxThreads];
/** ROB resource sharing policy for SMT mode. */
SMTQueuePolicy robPolicy;
@@ -87,7 +92,7 @@ class ROB
* @param _cpu The cpu object pointer.
* @param params The cpu params including several ROB-specific parameters.
*/
ROB(FullO3CPU *_cpu, const DerivO3CPUParams &params);
ROB(CPU *_cpu, const O3CPUParams &params);
std::string name() const;
@@ -107,36 +112,36 @@ class ROB
* ROB for the new instruction.
* @param inst The instruction being inserted into the ROB.
*/
void insertInst(const O3DynInstPtr &inst);
void insertInst(const DynInstPtr &inst);
/** Returns pointer to the head instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
// O3DynInstPtr readHeadInst();
// DynInstPtr readHeadInst();
/** Returns a pointer to the head instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the head of the ROB.
*/
const O3DynInstPtr &readHeadInst(ThreadID tid);
const DynInstPtr &readHeadInst(ThreadID tid);
/** Returns a pointer to the instruction with the given sequence if it is
* in the ROB.
*/
O3DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
/** Returns pointer to the tail instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the tail of the ROB.
*/
// O3DynInstPtr readTailInst();
// DynInstPtr readTailInst();
/** Returns a pointer to the tail instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the tail of the ROB.
*/
O3DynInstPtr readTailInst(ThreadID tid);
DynInstPtr readTailInst(ThreadID tid);
/** Retires the head instruction, removing it from the ROB. */
// void retireHead();
@@ -264,7 +269,7 @@ class ROB
void resetState();
/** Pointer to the CPU. */
FullO3CPU *cpu;
CPU *cpu;
/** Active Threads in CPU */
std::list<ThreadID> *activeThreads;
@@ -273,13 +278,13 @@ class ROB
unsigned numEntries;
/** Entries Per Thread */
unsigned threadEntries[O3MaxThreads];
unsigned threadEntries[MaxThreads];
/** Max Insts a Thread Can Have in the ROB */
unsigned maxEntries[O3MaxThreads];
unsigned maxEntries[MaxThreads];
/** ROB List of Instructions */
std::list<O3DynInstPtr> instList[O3MaxThreads];
std::list<DynInstPtr> instList[MaxThreads];
/** Number of instructions that can be squashed in a single cycle. */
unsigned squashWidth;
@@ -303,21 +308,21 @@ class ROB
* and after a squash.
* This will always be set to cpu->instList.end() if it is invalid.
*/
InstIt squashIt[O3MaxThreads];
InstIt squashIt[MaxThreads];
public:
/** Number of instructions in the ROB. */
int numInstsInROB;
/** Dummy instruction returned if there are no insts left. */
O3DynInstPtr dummyInst;
DynInstPtr dummyInst;
private:
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum[O3MaxThreads];
InstSeqNum squashedSeqNum[MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[O3MaxThreads];
bool doneSquashing[MaxThreads];
/** Number of active threads. */
ThreadID numThreads;
@@ -334,4 +339,6 @@ class ROB
} stats;
};
} // namespace o3
#endif //__CPU_O3_ROB_HH__

View File

@@ -29,8 +29,13 @@
#include "cpu/o3/scoreboard.hh"
namespace o3
{
Scoreboard::Scoreboard(const std::string &_my_name, unsigned _numPhysicalRegs,
RegIndex zero_reg) :
_name(_my_name), zeroReg(zero_reg), regScoreBoard(_numPhysicalRegs, true),
numPhysRegs(_numPhysicalRegs)
{}
} // namespace o3

View File

@@ -37,6 +37,10 @@
#include "base/trace.hh"
#include "cpu/reg_class.hh"
#include "debug/Scoreboard.hh"
namespace o3
{
/**
* Implements a simple scoreboard to track which registers are
* ready. This class operates on the unified physical register space,
@@ -132,4 +136,6 @@ class Scoreboard
};
} // namespace o3
#endif

View File

@@ -33,6 +33,9 @@
#include "base/trace.hh"
#include "debug/StoreSet.hh"
namespace o3
{
StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size)
: clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size)
{
@@ -364,3 +367,5 @@ StoreSet::dump()
store_list_it++;
}
}
} // namespace o3

View File

@@ -37,9 +37,13 @@
#include "base/types.hh"
#include "cpu/inst_seq.hh"
namespace o3
{
struct ltseqnum
{
bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
bool
operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
{
return lhs > rhs;
}
@@ -156,4 +160,6 @@ class StoreSet
int memOpsPred;
};
} // namespace o3
#endif // __CPU_O3_STORE_SET_HH__

View File

@@ -45,14 +45,17 @@
#include "config/the_isa.hh"
#include "debug/O3CPU.hh"
namespace o3
{
PortProxy&
O3ThreadContext::getVirtProxy()
ThreadContext::getVirtProxy()
{
return thread->getVirtProxy();
}
void
O3ThreadContext::takeOverFrom(ThreadContext *old_context)
ThreadContext::takeOverFrom(::ThreadContext *old_context)
{
::takeOverFrom(*this, *old_context);
@@ -69,28 +72,28 @@ O3ThreadContext::takeOverFrom(ThreadContext *old_context)
}
void
O3ThreadContext::activate()
ThreadContext::activate()
{
DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
threadId());
if (thread->status() == ThreadContext::Active)
if (thread->status() == ::ThreadContext::Active)
return;
thread->lastActivate = curTick();
thread->setStatus(ThreadContext::Active);
thread->setStatus(::ThreadContext::Active);
// status() == Suspended
cpu->activateContext(thread->threadId());
}
void
O3ThreadContext::suspend()
ThreadContext::suspend()
{
DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
threadId());
if (thread->status() == ThreadContext::Suspended)
if (thread->status() == ::ThreadContext::Suspended)
return;
if (cpu->isDraining()) {
@@ -101,43 +104,43 @@ O3ThreadContext::suspend()
thread->lastActivate = curTick();
thread->lastSuspend = curTick();
thread->setStatus(ThreadContext::Suspended);
thread->setStatus(::ThreadContext::Suspended);
cpu->suspendContext(thread->threadId());
}
void
O3ThreadContext::halt()
ThreadContext::halt()
{
DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", threadId());
if (thread->status() == ThreadContext::Halting ||
thread->status() == ThreadContext::Halted)
if (thread->status() == ::ThreadContext::Halting ||
thread->status() == ::ThreadContext::Halted)
return;
// the thread is not going to halt/terminate immediately in this cycle.
// The thread will be removed after an exit trap is processed
// (e.g., after trapLatency cycles). Until then, the thread's status
// will be Halting.
thread->setStatus(ThreadContext::Halting);
thread->setStatus(::ThreadContext::Halting);
// add this thread to the exiting list to mark that it is trying to exit.
cpu->addThreadToExitingList(thread->threadId());
}
Tick
O3ThreadContext::readLastActivate()
ThreadContext::readLastActivate()
{
return thread->lastActivate;
}
Tick
O3ThreadContext::readLastSuspend()
ThreadContext::readLastSuspend()
{
return thread->lastSuspend;
}
void
O3ThreadContext::copyArchRegs(ThreadContext *tc)
ThreadContext::copyArchRegs(::ThreadContext *tc)
{
// Set vector renaming mode before copying registers
cpu->vecRenameMode(tc->getIsaPtr()->vecRegRenameMode(tc));
@@ -152,62 +155,61 @@ O3ThreadContext::copyArchRegs(ThreadContext *tc)
}
void
O3ThreadContext::clearArchRegs()
ThreadContext::clearArchRegs()
{
cpu->isa[thread->threadId()]->clear();
}
RegVal
O3ThreadContext::readIntRegFlat(RegIndex reg_idx) const
ThreadContext::readIntRegFlat(RegIndex reg_idx) const
{
return cpu->readArchIntReg(reg_idx, thread->threadId());
}
RegVal
O3ThreadContext::readFloatRegFlat(RegIndex reg_idx) const
ThreadContext::readFloatRegFlat(RegIndex reg_idx) const
{
return cpu->readArchFloatReg(reg_idx, thread->threadId());
}
const TheISA::VecRegContainer&
O3ThreadContext::readVecRegFlat(RegIndex reg_id) const
ThreadContext::readVecRegFlat(RegIndex reg_id) const
{
return cpu->readArchVecReg(reg_id, thread->threadId());
}
TheISA::VecRegContainer&
O3ThreadContext::getWritableVecRegFlat(RegIndex reg_id)
ThreadContext::getWritableVecRegFlat(RegIndex reg_id)
{
return cpu->getWritableArchVecReg(reg_id, thread->threadId());
}
const TheISA::VecElem&
O3ThreadContext::readVecElemFlat(RegIndex idx,
const ElemIndex& elemIndex) const
ThreadContext::readVecElemFlat(RegIndex idx, const ElemIndex& elemIndex) const
{
return cpu->readArchVecElem(idx, elemIndex, thread->threadId());
}
const TheISA::VecPredRegContainer&
O3ThreadContext::readVecPredRegFlat(RegIndex reg_id) const
ThreadContext::readVecPredRegFlat(RegIndex reg_id) const
{
return cpu->readArchVecPredReg(reg_id, thread->threadId());
}
TheISA::VecPredRegContainer&
O3ThreadContext::getWritableVecPredRegFlat(RegIndex reg_id)
ThreadContext::getWritableVecPredRegFlat(RegIndex reg_id)
{
return cpu->getWritableArchVecPredReg(reg_id, thread->threadId());
}
RegVal
O3ThreadContext::readCCRegFlat(RegIndex reg_idx) const
ThreadContext::readCCRegFlat(RegIndex reg_idx) const
{
return cpu->readArchCCReg(reg_idx, thread->threadId());
}
void
O3ThreadContext::setIntRegFlat(RegIndex reg_idx, RegVal val)
ThreadContext::setIntRegFlat(RegIndex reg_idx, RegVal val)
{
cpu->setArchIntReg(reg_idx, val, thread->threadId());
@@ -215,7 +217,7 @@ O3ThreadContext::setIntRegFlat(RegIndex reg_idx, RegVal val)
}
void
O3ThreadContext::setFloatRegFlat(RegIndex reg_idx, RegVal val)
ThreadContext::setFloatRegFlat(RegIndex reg_idx, RegVal val)
{
cpu->setArchFloatReg(reg_idx, val, thread->threadId());
@@ -223,7 +225,7 @@ O3ThreadContext::setFloatRegFlat(RegIndex reg_idx, RegVal val)
}
void
O3ThreadContext::setVecRegFlat(
ThreadContext::setVecRegFlat(
RegIndex reg_idx, const TheISA::VecRegContainer& val)
{
cpu->setArchVecReg(reg_idx, val, thread->threadId());
@@ -232,7 +234,7 @@ O3ThreadContext::setVecRegFlat(
}
void
O3ThreadContext::setVecElemFlat(RegIndex idx,
ThreadContext::setVecElemFlat(RegIndex idx,
const ElemIndex& elemIndex, const TheISA::VecElem& val)
{
cpu->setArchVecElem(idx, elemIndex, val, thread->threadId());
@@ -240,7 +242,7 @@ O3ThreadContext::setVecElemFlat(RegIndex idx,
}
void
O3ThreadContext::setVecPredRegFlat(RegIndex reg_idx,
ThreadContext::setVecPredRegFlat(RegIndex reg_idx,
const TheISA::VecPredRegContainer& val)
{
cpu->setArchVecPredReg(reg_idx, val, thread->threadId());
@@ -249,7 +251,7 @@ O3ThreadContext::setVecPredRegFlat(RegIndex reg_idx,
}
void
O3ThreadContext::setCCRegFlat(RegIndex reg_idx, RegVal val)
ThreadContext::setCCRegFlat(RegIndex reg_idx, RegVal val)
{
cpu->setArchCCReg(reg_idx, val, thread->threadId());
@@ -257,7 +259,7 @@ O3ThreadContext::setCCRegFlat(RegIndex reg_idx, RegVal val)
}
void
O3ThreadContext::pcState(const TheISA::PCState &val)
ThreadContext::pcState(const TheISA::PCState &val)
{
cpu->pcState(val, thread->threadId());
@@ -265,7 +267,7 @@ O3ThreadContext::pcState(const TheISA::PCState &val)
}
void
O3ThreadContext::pcStateNoRecord(const TheISA::PCState &val)
ThreadContext::pcStateNoRecord(const TheISA::PCState &val)
{
cpu->pcState(val, thread->threadId());
@@ -273,13 +275,13 @@ O3ThreadContext::pcStateNoRecord(const TheISA::PCState &val)
}
RegId
O3ThreadContext::flattenRegId(const RegId& regId) const
ThreadContext::flattenRegId(const RegId& regId) const
{
return cpu->isa[thread->threadId()]->flattenRegId(regId);
}
void
O3ThreadContext::setMiscRegNoEffect(RegIndex misc_reg, RegVal val)
ThreadContext::setMiscRegNoEffect(RegIndex misc_reg, RegVal val)
{
cpu->setMiscRegNoEffect(misc_reg, val, thread->threadId());
@@ -287,7 +289,7 @@ O3ThreadContext::setMiscRegNoEffect(RegIndex misc_reg, RegVal val)
}
void
O3ThreadContext::setMiscReg(RegIndex misc_reg, RegVal val)
ThreadContext::setMiscReg(RegIndex misc_reg, RegVal val)
{
cpu->setMiscReg(misc_reg, val, thread->threadId());
@@ -296,7 +298,7 @@ O3ThreadContext::setMiscReg(RegIndex misc_reg, RegVal val)
// hardware transactional memory
void
O3ThreadContext::htmAbortTransaction(uint64_t htmUid,
ThreadContext::htmAbortTransaction(uint64_t htmUid,
HtmFailureFaultCause cause)
{
cpu->htmSendAbortSignal(thread->threadId(), htmUid, cause);
@@ -305,13 +307,15 @@ O3ThreadContext::htmAbortTransaction(uint64_t htmUid,
}
BaseHTMCheckpointPtr&
O3ThreadContext::getHtmCheckpointPtr()
ThreadContext::getHtmCheckpointPtr()
{
return thread->htmCheckpoint;
}
void
O3ThreadContext::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt)
ThreadContext::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt)
{
thread->htmCheckpoint = std::move(new_cpt);
}
} // namespace o3

View File

@@ -46,6 +46,9 @@
#include "cpu/o3/isa_specific.hh"
#include "cpu/thread_context.hh"
namespace o3
{
/**
* Derived ThreadContext class for use with the O3CPU. It
* provides the interface for any external objects to access a
@@ -54,16 +57,16 @@
* the CPU will create an event to squash all in-flight
* instructions in order to ensure state is maintained correctly.
* It must be defined specifically for the O3CPU because
* not all architectural state is located within the O3ThreadState
* not all architectural state is located within the ThreadState
* (such as the commit PC, and registers), and specific actions
* must be taken when using this interface (such as squashing all
* in-flight instructions when doing a write to this interface).
*/
class O3ThreadContext : public ThreadContext
class ThreadContext : public ::ThreadContext
{
public:
/** Pointer to the CPU. */
FullO3CPU *cpu;
CPU *cpu;
bool
schedule(PCEvent *e) override
@@ -93,7 +96,7 @@ class O3ThreadContext : public ThreadContext
}
/** Pointer to the thread state that this TC corrseponds to. */
O3ThreadState *thread;
ThreadState *thread;
/** Returns a pointer to the MMU. */
BaseMMU *getMMUPtr() override { return cpu->mmu; }
@@ -142,7 +145,7 @@ class O3ThreadContext : public ThreadContext
PortProxy &getVirtProxy() override;
void
initMemProxies(ThreadContext *tc) override
initMemProxies(::ThreadContext *tc) override
{
thread->initMemProxies(tc);
}
@@ -167,7 +170,7 @@ class O3ThreadContext : public ThreadContext
void halt() override;
/** Takes over execution of a thread from another CPU. */
void takeOverFrom(ThreadContext *old_context) override;
void takeOverFrom(::ThreadContext *old_context) override;
/** Reads the last tick that this thread was activated on. */
Tick readLastActivate() override;
@@ -175,7 +178,7 @@ class O3ThreadContext : public ThreadContext
Tick readLastSuspend() override;
/** Copies the architectural registers from another TC into this TC. */
void copyArchRegs(ThreadContext *tc) override;
void copyArchRegs(::ThreadContext *tc) override;
/** Resets all architectural registers to 0. */
void clearArchRegs() override;
@@ -401,4 +404,6 @@ class O3ThreadContext : public ThreadContext
void setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt) override;
};
} // namespace o3
#endif

View File

@@ -42,30 +42,35 @@
#include "cpu/o3/cpu.hh"
O3ThreadState::O3ThreadState(FullO3CPU *_cpu, int _thread_num,
Process *_process) : ThreadState(_cpu, _thread_num, _process),
namespace o3
{
ThreadState::ThreadState(CPU *_cpu, int _thread_num, Process *_process) :
::ThreadState(_cpu, _thread_num, _process),
comInstEventQueue("instruction-based event queue")
{}
void
O3ThreadState::serialize(CheckpointOut &cp) const
ThreadState::serialize(CheckpointOut &cp) const
{
ThreadState::serialize(cp);
::ThreadState::serialize(cp);
// Use the ThreadContext serialization helper to serialize the
// TC.
::serialize(*tc, cp);
}
void
O3ThreadState::unserialize(CheckpointIn &cp)
ThreadState::unserialize(CheckpointIn &cp)
{
// Prevent squashing - we don't have any instructions in
// flight that we need to squash since we just instantiated a
// clean system.
noSquashFromTC = true;
ThreadState::unserialize(cp);
::ThreadState::unserialize(cp);
// Use the ThreadContext serialization helper to unserialize
// the TC.
::unserialize(*tc, cp);
noSquashFromTC = false;
}
} // namespace o3

View File

@@ -47,7 +47,11 @@
#include "cpu/thread_state.hh"
class Process;
class FullO3CPU;
namespace o3
{
class CPU;
/**
* Class that has various thread state, such as the status, the
@@ -56,7 +60,7 @@ class FullO3CPU;
* pointer, etc. It also handles anything related to a specific
* thread's process, such as syscalls and checking valid addresses.
*/
class O3ThreadState : public ThreadState
class ThreadState : public ::ThreadState
{
public:
PCEventQueue pcEventQueue;
@@ -84,16 +88,18 @@ class O3ThreadState : public ThreadState
/** Pointer to the hardware transactional memory checkpoint. */
std::unique_ptr<BaseHTMCheckpoint> htmCheckpoint;
O3ThreadState(FullO3CPU *_cpu, int _thread_num, Process *_process);
ThreadState(CPU *_cpu, int _thread_num, Process *_process);
void serialize(CheckpointOut &cp) const override;
void unserialize(CheckpointIn &cp) override;
/** Pointer to the ThreadContext of this thread. */
ThreadContext *tc = nullptr;
::ThreadContext *tc = nullptr;
/** Returns a pointer to the TC of this thread. */
ThreadContext *getTC() { return tc; }
::ThreadContext *getTC() { return tc; }
};
} // namespace o3
#endif // __CPU_O3_THREAD_STATE_HH__