cpu: Drop the DynInstPtr types from O3CPUImpl.

Aside from basic code editting, this also moves some methods from the
.hh files to the _impl.hh files. It also changes the Checker CPU
template to take the DynInstPtr type directly instead of through Impl
since that was the only type it used anyway. Finally it sets up a header
file which predeclares the O3DynInstPtr and O3DynInstConstPtr types so
they can be used without having to also include the BaseO3DynInst class
definition to break circular dependencies.

Change-Id: I5ca6af38ec13e6e820abcdb3748412e4f7fc1c78
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42101
Reviewed-by: Nathanael Premillieu <nathanael.premillieu@huawei.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2021-03-01 21:37:33 -08:00
parent 2caf2509a2
commit 2db8b308e0
34 changed files with 912 additions and 847 deletions

View File

@@ -51,6 +51,7 @@
#include "cpu/base.hh"
#include "cpu/exec_context.hh"
#include "cpu/inst_res.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/pc_event.hh"
#include "cpu/simple_thread.hh"
#include "cpu/static_inst.hh"
@@ -559,12 +560,9 @@ class CheckerCPU : public BaseCPU, public ExecContext
* template instantiations of the Checker must be placed at the bottom
* of checker/cpu.cc.
*/
template <class Impl>
template <class DynInstPtr>
class Checker : public CheckerCPU
{
private:
typedef typename Impl::DynInstPtr DynInstPtr;
public:
Checker(const Params &p)
: CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL)

View File

@@ -59,9 +59,9 @@
#include "sim/sim_object.hh"
#include "sim/stats.hh"
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::advancePC(const Fault &fault)
Checker<DynInstPtr>::advancePC(const Fault &fault)
{
if (fault != NoFault) {
curMacroStaticInst = nullStaticInstPtr;
@@ -80,9 +80,9 @@ Checker<Impl>::advancePC(const Fault &fault)
}
//////////////////////////////////////////////////
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::handlePendingInt()
Checker<DynInstPtr>::handlePendingInt()
{
DPRINTF(Checker, "IRQ detected at PC: %s with %d insts in buffer\n",
thread->pcState(), instList.size());
@@ -114,9 +114,9 @@ Checker<Impl>::handlePendingInt()
curMacroStaticInst = nullStaticInstPtr;
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::verify(const DynInstPtr &completed_inst)
Checker<DynInstPtr>::verify(const DynInstPtr &completed_inst)
{
DynInstPtr inst;
@@ -428,22 +428,19 @@ Checker<Impl>::verify(const DynInstPtr &completed_inst)
unverifiedInst = NULL;
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::switchOut()
Checker<DynInstPtr>::switchOut()
{
instList.clear();
}
template <class Impl>
void
Checker<Impl>::takeOverFrom(BaseCPU *oldCPU)
{
}
template <class DynInstPtr>
void Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) {}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::validateInst(const DynInstPtr &inst)
Checker<DynInstPtr>::validateInst(const DynInstPtr &inst)
{
if (inst->instAddr() != thread->instAddr()) {
warn("%lli: PCs do not match! Inst: %s, checker: %s",
@@ -462,9 +459,9 @@ Checker<Impl>::validateInst(const DynInstPtr &inst)
}
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::validateExecution(const DynInstPtr &inst)
Checker<DynInstPtr>::validateExecution(const DynInstPtr &inst)
{
InstResult checker_val;
InstResult inst_val;
@@ -555,9 +552,9 @@ Checker<Impl>::validateExecution(const DynInstPtr &inst)
// This function is weird, if it is called it means the Checker and
// O3 have diverged, so panic is called for now. It may be useful
// to resynch states and continue if the divergence is a false positive
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::validateState()
Checker<DynInstPtr>::validateState()
{
if (updateThisCycle) {
// Change this back to warn if divergences end up being false positives
@@ -580,10 +577,10 @@ Checker<Impl>::validateState()
}
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::copyResult(const DynInstPtr &inst,
const InstResult& mismatch_val, int start_idx)
Checker<DynInstPtr>::copyResult(
const DynInstPtr &inst, const InstResult& mismatch_val, int start_idx)
{
// We've already popped one dest off the queue,
// so do the fix-up then start with the next dest reg;
@@ -657,9 +654,9 @@ Checker<Impl>::copyResult(const DynInstPtr &inst,
}
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::dumpAndExit(const DynInstPtr &inst)
Checker<DynInstPtr>::dumpAndExit(const DynInstPtr &inst)
{
cprintf("Error detected, instruction information:\n");
cprintf("PC:%s, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n"
@@ -673,9 +670,9 @@ Checker<Impl>::dumpAndExit(const DynInstPtr &inst)
CheckerCPU::dumpAndExit();
}
template <class Impl>
template <class DynInstPtr>
void
Checker<Impl>::dumpInsts()
Checker<DynInstPtr>::dumpInsts()
{
int num = 0;

View File

@@ -43,4 +43,4 @@
#include "cpu/checker/cpu_impl.hh"
template
class Checker<O3CPUImpl>;
class Checker<O3DynInstPtr>;

View File

@@ -48,10 +48,10 @@
/**
* Specific non-templated derived class used for SimObject configuration.
*/
class O3Checker : public Checker<O3CPUImpl>
class O3Checker : public Checker<O3DynInstPtr>
{
public:
O3Checker(const Params &p) : Checker<O3CPUImpl>(p)
O3Checker(const Params &p) : Checker<O3DynInstPtr>(p)
{
// The checker should check all instructions executed by the main
// cpu and therefore any parameters for early exit don't make much

View File

@@ -47,6 +47,7 @@
#include "arch/types.hh"
#include "base/types.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/limits.hh"
#include "sim/faults.hh"
@@ -54,11 +55,9 @@
template<class Impl>
struct DefaultFetchDefaultDecode
{
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[O3MaxWidth];
O3DynInstPtr insts[O3MaxWidth];
Fault fetchFault;
InstSeqNum fetchFaultSN;
bool clearFetchFault;
@@ -68,34 +67,28 @@ struct DefaultFetchDefaultDecode
template<class Impl>
struct DefaultDecodeDefaultRename
{
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[O3MaxWidth];
O3DynInstPtr insts[O3MaxWidth];
};
/** Struct that defines the information passed from rename to IEW. */
template<class Impl>
struct DefaultRenameDefaultIEW
{
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[O3MaxWidth];
O3DynInstPtr insts[O3MaxWidth];
};
/** Struct that defines the information passed from IEW to commit. */
template<class Impl>
struct DefaultIEWDefaultCommit
{
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[O3MaxWidth];
DynInstPtr mispredictInst[O3MaxThreads];
O3DynInstPtr insts[O3MaxWidth];
O3DynInstPtr mispredictInst[O3MaxThreads];
Addr mispredPC[O3MaxThreads];
InstSeqNum squashedSeqNum[O3MaxThreads];
TheISA::PCState pc[O3MaxThreads];
@@ -109,23 +102,20 @@ struct DefaultIEWDefaultCommit
template<class Impl>
struct IssueStruct
{
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[O3MaxWidth];
O3DynInstPtr insts[O3MaxWidth];
};
/** Struct that defines all backwards communication. */
template<class Impl>
struct TimeBufStruct
{
typedef typename Impl::DynInstPtr DynInstPtr;
struct decodeComm
struct DecodeComm
{
TheISA::PCState nextPC;
DynInstPtr mispredictInst;
DynInstPtr squashInst;
O3DynInstPtr mispredictInst;
O3DynInstPtr squashInst;
InstSeqNum doneSeqNum;
Addr mispredPC;
uint64_t branchAddr;
@@ -136,15 +126,13 @@ struct TimeBufStruct
bool branchTaken;
};
decodeComm decodeInfo[O3MaxThreads];
DecodeComm decodeInfo[O3MaxThreads];
struct renameComm
{
};
struct RenameComm {};
renameComm renameInfo[O3MaxThreads];
RenameComm renameInfo[O3MaxThreads];
struct iewComm
struct IewComm
{
// Also eventually include skid buffer space.
unsigned freeIQEntries;
@@ -161,9 +149,9 @@ struct TimeBufStruct
bool usedLSQ;
};
iewComm iewInfo[O3MaxThreads];
IewComm iewInfo[O3MaxThreads];
struct commitComm
struct CommitComm
{
/////////////////////////////////////////////////////////////////////
// This code has been re-structured for better packing of variables
@@ -184,14 +172,14 @@ struct TimeBufStruct
/// Provide fetch the instruction that mispredicted, if this
/// pointer is not-null a misprediction occured
DynInstPtr mispredictInst; // *F
O3DynInstPtr mispredictInst; // *F
/// Instruction that caused the a non-mispredict squash
DynInstPtr squashInst; // *F
O3DynInstPtr squashInst; // *F
/// Hack for now to send back a strictly ordered access to the
/// IEW stage.
DynInstPtr strictlyOrderedLoad; // *I
O3DynInstPtr strictlyOrderedLoad; // *I
/// Communication specifically to the IQ to tell the IQ that it can
/// schedule a non-speculative instruction.
@@ -227,7 +215,7 @@ struct TimeBufStruct
};
commitComm commitInfo[O3MaxThreads];
CommitComm commitInfo[O3MaxThreads];
bool decodeBlock[O3MaxThreads];
bool decodeUnblock[O3MaxThreads];

View File

@@ -46,6 +46,7 @@
#include "base/statistics.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/iew.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/rename_map.hh"
@@ -87,7 +88,6 @@ class DefaultCommit
public:
// Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::TimeStruct TimeStruct;
typedef typename Impl::FetchStruct FetchStruct;
typedef typename Impl::IEWStruct IEWStruct;
@@ -126,10 +126,10 @@ class DefaultCommit
CommitPolicy commitPolicy;
/** Probe Points. */
ProbePointArg<DynInstPtr> *ppCommit;
ProbePointArg<DynInstPtr> *ppCommitStall;
ProbePointArg<O3DynInstPtr> *ppCommit;
ProbePointArg<O3DynInstPtr> *ppCommitStall;
/** To probe when an instruction is squashed */
ProbePointArg<DynInstPtr> *ppSquash;
ProbePointArg<O3DynInstPtr> *ppSquash;
/** Mark the thread as processing a trap. */
void processTrapEvent(ThreadID tid);
@@ -277,7 +277,7 @@ class DefaultCommit
* @param tid ID of the thread to squash.
* @param head_inst Instruction that requested the squash.
*/
void squashAfter(ThreadID tid, const DynInstPtr &head_inst);
void squashAfter(ThreadID tid, const O3DynInstPtr &head_inst);
/** Handles processing an interrupt. */
void handleInterrupt();
@@ -291,7 +291,7 @@ class DefaultCommit
/** Tries to commit the head ROB instruction passed in.
* @param head_inst The instruction to be committed.
*/
bool commitHead(const DynInstPtr &head_inst, unsigned inst_num);
bool commitHead(const O3DynInstPtr &head_inst, unsigned inst_num);
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
@@ -385,7 +385,7 @@ class DefaultCommit
* that caused a squash since this needs to be passed to the fetch
* stage once squashing starts.
*/
DynInstPtr squashAfterInst[O3MaxThreads];
O3DynInstPtr squashAfterInst[O3MaxThreads];
/** Priority List used for Commit Policy */
std::list<ThreadID> priority_list;
@@ -472,7 +472,7 @@ class DefaultCommit
bool avoidQuiesceLiveLock;
/** Updates commit stats based on this instruction. */
void updateComInstStats(const DynInstPtr &inst);
void updateComInstStats(const O3DynInstPtr &inst);
// HTM
int htmStarts[O3MaxThreads];

View File

@@ -54,6 +54,7 @@
#include "cpu/exetrace.hh"
#include "cpu/null_static_inst.hh"
#include "cpu/o3/commit.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/thread_state.hh"
#include "cpu/timebuf.hh"
@@ -140,9 +141,12 @@ template <class Impl>
void
DefaultCommit<Impl>::regProbePoints()
{
ppCommit = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Commit");
ppCommitStall = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "CommitStall");
ppSquash = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Squash");
ppCommit = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Commit");
ppCommitStall = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "CommitStall");
ppSquash = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Squash");
}
template <class Impl>
@@ -653,7 +657,7 @@ DefaultCommit<Impl>::squashFromSquashAfter(ThreadID tid)
template <class Impl>
void
DefaultCommit<Impl>::squashAfter(ThreadID tid, const DynInstPtr &head_inst)
DefaultCommit<Impl>::squashAfter(ThreadID tid, const O3DynInstPtr &head_inst)
{
DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%llu]\n",
tid, head_inst->seqNum);
@@ -713,14 +717,14 @@ DefaultCommit<Impl>::tick()
// will be active.
_nextStatus = Active;
GEM5_VAR_USED const DynInstPtr &inst = rob->readHeadInst(tid);
GEM5_VAR_USED const O3DynInstPtr &inst = rob->readHeadInst(tid);
DPRINTF(Commit,"[tid:%i] Instruction [sn:%llu] PC %s is head of"
" ROB and ready to commit\n",
tid, inst->seqNum, inst->pcState());
} else if (!rob->isEmpty(tid)) {
const DynInstPtr &inst = rob->readHeadInst(tid);
const O3DynInstPtr &inst = rob->readHeadInst(tid);
ppCommitStall->notify(inst);
@@ -1001,7 +1005,7 @@ DefaultCommit<Impl>::commitInsts()
unsigned num_committed = 0;
DynInstPtr head_inst;
O3DynInstPtr head_inst;
// Commit as many instructions as possible until the commit bandwidth
// limit is reached, or it becomes impossible to commit any more.
@@ -1192,7 +1196,8 @@ DefaultCommit<Impl>::commitInsts()
template <class Impl>
bool
DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
DefaultCommit<Impl>::commitHead(
const O3DynInstPtr &head_inst, unsigned inst_num)
{
assert(head_inst);
@@ -1391,7 +1396,7 @@ DefaultCommit<Impl>::getInsts()
int insts_to_process = std::min((int)renameWidth, fromRename->size);
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
const DynInstPtr &inst = fromRename->insts[inst_num];
const O3DynInstPtr &inst = fromRename->insts[inst_num];
ThreadID tid = inst->threadNumber;
if (!inst->isSquashed() &&
@@ -1438,7 +1443,7 @@ DefaultCommit<Impl>::markCompletedInsts()
template <class Impl>
void
DefaultCommit<Impl>::updateComInstStats(const DynInstPtr &inst)
DefaultCommit<Impl>::updateComInstStats(const O3DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -1583,7 +1588,7 @@ DefaultCommit<Impl>::oldestReady()
if (rob->isHeadReady(tid)) {
const DynInstPtr &head_inst = rob->readHeadInst(tid);
const O3DynInstPtr &head_inst = rob->readHeadInst(tid);
if (first) {
oldest = tid;

View File

@@ -136,7 +136,7 @@ FullO3CPU<Impl>::FullO3CPU(const DerivO3CPUParams &params)
if (params.checker) {
BaseCPU *temp_checker = params.checker;
checker = dynamic_cast<Checker<Impl> *>(temp_checker);
checker = dynamic_cast<Checker<O3DynInstPtr> *>(temp_checker);
checker->setIcachePort(&this->fetch.getInstPort());
checker->setSystem(params.system);
} else {
@@ -378,8 +378,11 @@ FullO3CPU<Impl>::regProbePoints()
{
BaseCPU::regProbePoints();
ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete");
ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete");
ppInstAccessComplete = new ProbePointArg<PacketPtr>(
getProbeManager(), "InstAccessComplete");
ppDataAccessComplete = new ProbePointArg<
std::pair<O3DynInstPtr, PacketPtr>>(
getProbeManager(), "DataAccessComplete");
fetch.regProbePoints();
rename.regProbePoints();
@@ -1501,7 +1504,7 @@ FullO3CPU<Impl>::squashFromTC(ThreadID tid)
template <class Impl>
typename FullO3CPU<Impl>::ListIt
FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
FullO3CPU<Impl>::addInst(const O3DynInstPtr &inst)
{
instList.push_back(inst);
@@ -1510,7 +1513,7 @@ FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
template <class Impl>
void
FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
FullO3CPU<Impl>::instDone(ThreadID tid, const O3DynInstPtr &inst)
{
// Keep an instruction count.
if (!inst->isMicroop() || inst->isLastMicroop()) {
@@ -1530,7 +1533,7 @@ FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
template <class Impl>
void
FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst)
FullO3CPU<Impl>::removeFrontInst(const O3DynInstPtr &inst)
{
DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s "
"[sn:%lli]\n",
@@ -1686,7 +1689,7 @@ FullO3CPU<Impl>::dumpInsts()
/*
template <class Impl>
void
FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst)
FullO3CPU<Impl>::wakeDependents(const O3DynInstPtr &inst)
{
iew.wakeDependents(inst);
}

View File

@@ -56,6 +56,7 @@
#include "cpu/o3/comm.hh"
#include "cpu/o3/commit.hh"
#include "cpu/o3/decode.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/fetch.hh"
#include "cpu/o3/free_list.hh"
#include "cpu/o3/iew.hh"
@@ -100,13 +101,12 @@ class FullO3CPU : public BaseO3CPU
{
public:
// Typedefs from the Impl here.
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
typedef typename std::list<DynInstPtr>::iterator ListIt;
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
friend class O3ThreadContext<Impl>;
@@ -184,7 +184,7 @@ class FullO3CPU : public BaseO3CPU
~FullO3CPU();
ProbePointArg<PacketPtr> *ppInstAccessComplete;
ProbePointArg<std::pair<DynInstPtr, PacketPtr> > *ppDataAccessComplete;
ProbePointArg<std::pair<O3DynInstPtr, PacketPtr> > *ppDataAccessComplete;
/** Register probe points. */
void regProbePoints() override;
@@ -439,15 +439,15 @@ class FullO3CPU : public BaseO3CPU
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
*/
ListIt addInst(const DynInstPtr &inst);
ListIt addInst(const O3DynInstPtr &inst);
/** Function to tell the CPU that an instruction has completed. */
void instDone(ThreadID tid, const DynInstPtr &inst);
void instDone(ThreadID tid, const O3DynInstPtr &inst);
/** Remove an instruction from the front end of the list. There's
* no restriction on location of the instruction.
*/
void removeFrontInst(const DynInstPtr &inst);
void removeFrontInst(const O3DynInstPtr &inst);
/** Remove all instructions that are not currently in the ROB.
* There's also an option to not squash delay slot instructions.*/
@@ -472,7 +472,7 @@ class FullO3CPU : public BaseO3CPU
#endif
/** List of all the instructions in flight. */
std::list<DynInstPtr> instList;
std::list<O3DynInstPtr> instList;
/** List of all the instructions that will be removed at the end of this
* cycle.
@@ -624,7 +624,7 @@ class FullO3CPU : public BaseO3CPU
* instruction results at run time. This can be set to NULL if it
* is not being used.
*/
Checker<Impl> *checker;
Checker<O3DynInstPtr> *checker;
/** Pointer to the system. */
System *system;
@@ -648,7 +648,7 @@ class FullO3CPU : public BaseO3CPU
std::vector<ThreadID> tids;
/** CPU pushRequest function, forwards request to LSQ. */
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op = nullptr,
const std::vector<bool>& byte_enable =

View File

@@ -44,6 +44,7 @@
#include <queue>
#include "base/statistics.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/limits.hh"
#include "cpu/timebuf.hh"
@@ -62,7 +63,6 @@ class DefaultDecode
private:
// Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::FetchStruct FetchStruct;
typedef typename Impl::DecodeStruct DecodeStruct;
typedef typename Impl::TimeStruct TimeStruct;
@@ -193,7 +193,7 @@ class DefaultDecode
/** Squashes if there is a PC-relative branch that was predicted
* incorrectly. Sends squash information back to fetch.
*/
void squash(const DynInstPtr &inst, ThreadID tid);
void squash(const O3DynInstPtr &inst, ThreadID tid);
public:
/** Squashes due to commit signalling a squash. Changes status to
@@ -235,10 +235,10 @@ class DefaultDecode
typename TimeBuffer<FetchStruct>::wire fromFetch;
/** Queue of all instructions coming from fetch this cycle. */
std::queue<DynInstPtr> insts[O3MaxThreads];
std::queue<O3DynInstPtr> insts[O3MaxThreads];
/** Skid buffer between fetch and decode. */
std::queue<DynInstPtr> skidBuffer[O3MaxThreads];
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
/** Variable that tracks if decode has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
@@ -285,7 +285,7 @@ class DefaultDecode
Addr bdelayDoneSeqNum[O3MaxThreads];
/** Instruction used for squashing branch (used for MIPS)*/
DynInstPtr squashInst[O3MaxThreads];
O3DynInstPtr squashInst[O3MaxThreads];
/** Tells when their is a pending delay slot inst. to send
* to rename. If there is, then wait squash after the next

View File

@@ -46,6 +46,7 @@
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/decode.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/limits.hh"
#include "debug/Activity.hh"
#include "debug/Decode.hh"
@@ -293,7 +294,7 @@ DefaultDecode<Impl>::unblock(ThreadID tid)
template<class Impl>
void
DefaultDecode<Impl>::squash(const DynInstPtr &inst, ThreadID tid)
DefaultDecode<Impl>::squash(const O3DynInstPtr &inst, ThreadID tid)
{
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
"prediction detected at decode.\n", tid, inst->seqNum);
@@ -395,7 +396,7 @@ template<class Impl>
void
DefaultDecode<Impl>::skidInsert(ThreadID tid)
{
DynInstPtr inst = NULL;
O3DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -655,7 +656,7 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
++stats.runCycles;
}
std::queue<DynInstPtr>
std::queue<O3DynInstPtr>
&insts_to_decode = decodeStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
@@ -664,7 +665,7 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
while (insts_available > 0 && toRenameIndex < decodeWidth) {
assert(!insts_to_decode.empty());
DynInstPtr inst = std::move(insts_to_decode.front());
O3DynInstPtr inst = std::move(insts_to_decode.front());
insts_to_decode.pop();

View File

@@ -57,6 +57,7 @@
#include "cpu/inst_res.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/lsq_unit.hh"
#include "cpu/op_class.hh"
@@ -67,10 +68,6 @@
class Packet;
class BaseO3DynInst;
using O3DynInstPtr = RefCountingPtr<BaseO3DynInst>;
class BaseO3DynInst : public ExecContext, public RefCounted
{
public:

View File

@@ -0,0 +1,52 @@
/*
* Copyright (c) 2010, 2016 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_O3_DYN_INST_PTR_HH__
#define __CPU_O3_DYN_INST_PTR_HH__
#include "base/refcnt.hh"
class BaseO3DynInst;
using O3DynInstPtr = RefCountingPtr<BaseO3DynInst>;
using O3DynInstConstPtr = RefCountingPtr<const BaseO3DynInst>;
#endif // __CPU_O3_DYN_INST_PTR_HH__

View File

@@ -44,6 +44,7 @@
#include "arch/decoder.hh"
#include "base/statistics.hh"
#include "config/the_isa.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/limits.hh"
#include "cpu/pc_event.hh"
#include "cpu/pred/bpred_unit.hh"
@@ -72,8 +73,6 @@ class DefaultFetch
{
public:
/** Typedefs from Impl. */
typedef typename Impl::DynInst DynInst;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::FetchStruct FetchStruct;
typedef typename Impl::TimeStruct TimeStruct;
@@ -207,7 +206,7 @@ class DefaultFetch
std::list<ThreadID> priorityList;
/** Probe points. */
ProbePointArg<DynInstPtr> *ppFetch;
ProbePointArg<O3DynInstPtr> *ppFetch;
/** To probe when a fetch request is successfully sent. */
ProbePointArg<RequestPtr> *ppFetchRequestSent;
@@ -294,7 +293,7 @@ class DefaultFetch
* @param next_NPC Used for ISAs which use delay slots.
* @return Whether or not a branch was predicted as taken.
*/
bool lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &pc);
bool lookupAndUpdateNextPC(const O3DynInstPtr &inst, TheISA::PCState &pc);
/**
* Fetches the cache line that contains the fetch PC. Returns any
@@ -321,14 +320,14 @@ class DefaultFetch
/** Squashes a specific thread and resets the PC. */
inline void doSquash(const TheISA::PCState &newPC,
const DynInstPtr squashInst, ThreadID tid);
const O3DynInstPtr squashInst, ThreadID tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode
* that should be sqaushed.
*/
void squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst,
const O3DynInstPtr squashInst,
const InstSeqNum seq_num, ThreadID tid);
/** Checks if a thread is stalled. */
@@ -344,7 +343,7 @@ class DefaultFetch
* squash should be the commit stage.
*/
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
DynInstPtr squashInst, ThreadID tid);
O3DynInstPtr squashInst, ThreadID tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
* as many instructions as possible.
@@ -375,9 +374,9 @@ class DefaultFetch
RequestPort &getInstPort() { return icachePort; }
private:
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace);
O3DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace);
/** Returns the appropriate thread to fetch, given the fetch policy. */
ThreadID getFetchingThread();
@@ -505,7 +504,7 @@ class DefaultFetch
unsigned fetchQueueSize;
/** Queue of fetched instructions. Per-thread to prevent HoL blocking. */
std::deque<DynInstPtr> fetchQueue[O3MaxThreads];
std::deque<O3DynInstPtr> fetchQueue[O3MaxThreads];
/** Whether or not the fetch buffer data is valid. */
bool fetchBufferValid[O3MaxThreads];

View File

@@ -150,7 +150,7 @@ template <class Impl>
void
DefaultFetch<Impl>::regProbePoints()
{
ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
ppFetch = new ProbePointArg<O3DynInstPtr>(cpu->getProbeManager(), "Fetch");
ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
"FetchRequest");
@@ -526,7 +526,7 @@ DefaultFetch<Impl>::deactivateThread(ThreadID tid)
template <class Impl>
bool
DefaultFetch<Impl>::lookupAndUpdateNextPC(
const DynInstPtr &inst, TheISA::PCState &nextPC)
const O3DynInstPtr &inst, TheISA::PCState &nextPC)
{
// Do branch prediction check here.
// A bit of a misnomer...next_PC is actually the current PC until
@@ -706,7 +706,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
// We will use a nop in ordier to carry the fault.
DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
fetchPC, fetchPC, false);
instruction->setNotAnInst();
@@ -729,7 +729,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
template <class Impl>
inline void
DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
const DynInstPtr squashInst, ThreadID tid)
const O3DynInstPtr squashInst, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
tid, newPC);
@@ -781,7 +781,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
template<class Impl>
void
DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst,
const O3DynInstPtr squashInst,
const InstSeqNum seq_num, ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
@@ -851,7 +851,7 @@ DefaultFetch<Impl>::updateFetchStatus()
template <class Impl>
void
DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
const InstSeqNum seq_num, DynInstPtr squashInst,
const InstSeqNum seq_num, O3DynInstPtr squashInst,
ThreadID tid)
{
DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
@@ -1070,7 +1070,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
}
template<class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace)
@@ -1079,8 +1079,8 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
InstSeqNum seq = cpu->getAndIncrementInstSeq();
// Create a new DynInst from the instruction fetched.
DynInstPtr instruction =
new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
O3DynInstPtr instruction =
new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
instruction->setTid(tid);
instruction->setThreadState(cpu->thread[tid]);
@@ -1297,7 +1297,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
newMacro |= staticInst->isLastMicroop();
}
DynInstPtr instruction =
O3DynInstPtr instruction =
buildInst(tid, staticInst, curMacroop,
thisPC, nextPC, true);

View File

@@ -46,6 +46,7 @@
#include "base/statistics.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/lsq.hh"
@@ -81,7 +82,6 @@ class DefaultIEW
{
private:
//Typedefs from Impl
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::TimeStruct TimeStruct;
typedef typename Impl::IEWStruct IEWStruct;
@@ -120,12 +120,12 @@ class DefaultIEW
StageStatus wbStatus;
/** Probe points. */
ProbePointArg<DynInstPtr> *ppMispredict;
ProbePointArg<DynInstPtr> *ppDispatch;
ProbePointArg<O3DynInstPtr> *ppMispredict;
ProbePointArg<O3DynInstPtr> *ppDispatch;
/** To probe when instruction execution begins. */
ProbePointArg<DynInstPtr> *ppExecute;
ProbePointArg<O3DynInstPtr> *ppExecute;
/** To probe when instruction execution is complete. */
ProbePointArg<DynInstPtr> *ppToCommit;
ProbePointArg<O3DynInstPtr> *ppToCommit;
public:
/** Constructs a DefaultIEW with the given parameters. */
@@ -171,24 +171,24 @@ class DefaultIEW
void squash(ThreadID tid);
/** Wakes all dependents of a completed instruction. */
void wakeDependents(const DynInstPtr &inst);
void wakeDependents(const O3DynInstPtr &inst);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
*/
void rescheduleMemInst(const DynInstPtr &inst);
void rescheduleMemInst(const O3DynInstPtr &inst);
/** Re-executes all rescheduled memory instructions. */
void replayMemInst(const DynInstPtr &inst);
void replayMemInst(const O3DynInstPtr &inst);
/** Moves memory instruction onto the list of cache blocked instructions */
void blockMemInst(const DynInstPtr &inst);
void blockMemInst(const O3DynInstPtr &inst);
/** Notifies that the cache has become unblocked */
void cacheUnblocked();
/** Sends an instruction to commit through the time buffer. */
void instToCommit(const DynInstPtr &inst);
void instToCommit(const O3DynInstPtr &inst);
/** Inserts unused instructions of a thread into the skid buffer. */
void skidInsert(ThreadID tid);
@@ -226,7 +226,7 @@ class DefaultIEW
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
/** Check misprediction */
void checkMisprediction(const DynInstPtr &inst);
void checkMisprediction(const O3DynInstPtr &inst);
// hardware transactional memory
// For debugging purposes, it is useful to keep track of the most recent
@@ -242,12 +242,12 @@ class DefaultIEW
/** Sends commit proper information for a squash due to a branch
* mispredict.
*/
void squashDueToBranch(const DynInstPtr &inst, ThreadID tid);
void squashDueToBranch(const O3DynInstPtr &inst, ThreadID tid);
/** Sends commit proper information for a squash due to a memory order
* violation.
*/
void squashDueToMemOrder(const DynInstPtr &inst, ThreadID tid);
void squashDueToMemOrder(const O3DynInstPtr &inst, ThreadID tid);
/** Sets Dispatch to blocked, and signals back to other stages to block. */
void block(ThreadID tid);
@@ -301,7 +301,7 @@ class DefaultIEW
private:
/** Updates execution stats based on the instruction. */
void updateExeInstStats(const DynInstPtr &inst);
void updateExeInstStats(const O3DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -337,10 +337,10 @@ class DefaultIEW
typename TimeBuffer<IEWStruct>::wire toCommit;
/** Queue of all instructions coming from rename this cycle. */
std::queue<DynInstPtr> insts[O3MaxThreads];
std::queue<O3DynInstPtr> insts[O3MaxThreads];
/** Skid buffer between rename and IEW. */
std::queue<DynInstPtr> skidBuffer[O3MaxThreads];
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
/** Scoreboard pointer. */
Scoreboard* scoreboard;

View File

@@ -50,6 +50,7 @@
#include "config/the_isa.hh"
#include "cpu/checker/cpu.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/fu_pool.hh"
#include "cpu/o3/iew.hh"
#include "cpu/o3/limits.hh"
@@ -122,20 +123,22 @@ template <class Impl>
void
DefaultIEW<Impl>::regProbePoints()
{
ppDispatch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Dispatch");
ppMispredict = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Mispredict");
ppDispatch = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Dispatch");
ppMispredict = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Mispredict");
/**
* Probe point with dynamic instruction as the argument used to probe when
* an instruction starts to execute.
*/
ppExecute = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(),
"Execute");
ppExecute = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Execute");
/**
* Probe point with dynamic instruction as the argument used to probe when
* an instruction execution completes and it is marked ready to commit.
*/
ppToCommit = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(),
"ToCommit");
ppToCommit = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "ToCommit");
}
template <class Impl>
@@ -461,7 +464,7 @@ DefaultIEW<Impl>::squash(ThreadID tid)
template<class Impl>
void
DefaultIEW<Impl>::squashDueToBranch(const DynInstPtr& inst, ThreadID tid)
DefaultIEW<Impl>::squashDueToBranch(const O3DynInstPtr& inst, ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] [sn:%llu] Squashing from a specific instruction,"
" PC: %s "
@@ -487,7 +490,7 @@ DefaultIEW<Impl>::squashDueToBranch(const DynInstPtr& inst, ThreadID tid)
template<class Impl>
void
DefaultIEW<Impl>::squashDueToMemOrder(const DynInstPtr& inst, ThreadID tid)
DefaultIEW<Impl>::squashDueToMemOrder(const O3DynInstPtr& inst, ThreadID tid)
{
DPRINTF(IEW, "[tid:%i] Memory violation, squashing violator and younger "
"insts, PC: %s [sn:%llu].\n", tid, inst->pcState(), inst->seqNum);
@@ -550,28 +553,28 @@ DefaultIEW<Impl>::unblock(ThreadID tid)
template<class Impl>
void
DefaultIEW<Impl>::wakeDependents(const DynInstPtr& inst)
DefaultIEW<Impl>::wakeDependents(const O3DynInstPtr& inst)
{
instQueue.wakeDependents(inst);
}
template<class Impl>
void
DefaultIEW<Impl>::rescheduleMemInst(const DynInstPtr& inst)
DefaultIEW<Impl>::rescheduleMemInst(const O3DynInstPtr& inst)
{
instQueue.rescheduleMemInst(inst);
}
template<class Impl>
void
DefaultIEW<Impl>::replayMemInst(const DynInstPtr& inst)
DefaultIEW<Impl>::replayMemInst(const O3DynInstPtr& inst)
{
instQueue.replayMemInst(inst);
}
template<class Impl>
void
DefaultIEW<Impl>::blockMemInst(const DynInstPtr& inst)
DefaultIEW<Impl>::blockMemInst(const O3DynInstPtr& inst)
{
instQueue.blockMemInst(inst);
}
@@ -585,7 +588,7 @@ DefaultIEW<Impl>::cacheUnblocked()
template<class Impl>
void
DefaultIEW<Impl>::instToCommit(const DynInstPtr& inst)
DefaultIEW<Impl>::instToCommit(const O3DynInstPtr& inst)
{
// This function should not be called after writebackInsts in a
// single cycle. That will cause problems with an instruction
@@ -630,7 +633,7 @@ template<class Impl>
void
DefaultIEW<Impl>::skidInsert(ThreadID tid)
{
DynInstPtr inst = NULL;
O3DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -927,13 +930,13 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
{
// Obtain instructions from skid buffer if unblocking, or queue from rename
// otherwise.
std::queue<DynInstPtr> &insts_to_dispatch =
std::queue<O3DynInstPtr> &insts_to_dispatch =
dispatchStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
int insts_to_add = insts_to_dispatch.size();
DynInstPtr inst;
O3DynInstPtr inst;
bool add_to_iq = false;
int dis_num_inst = 0;
@@ -1208,7 +1211,7 @@ DefaultIEW<Impl>::executeInsts()
DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
DynInstPtr inst = instQueue.getInstToExecute();
O3DynInstPtr inst = instQueue.getInstToExecute();
DPRINTF(IEW, "Execute: Processing PC %s, [tid:%i] [sn:%llu].\n",
inst->pcState(), inst->threadNumber,inst->seqNum);
@@ -1372,7 +1375,7 @@ DefaultIEW<Impl>::executeInsts()
// If there was an ordering violation, then get the
// DynInst that caused the violation. Note that this
// clears the violation signal.
DynInstPtr violator;
O3DynInstPtr violator;
violator = ldstQueue.getMemDepViolator(tid);
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
@@ -1396,7 +1399,7 @@ DefaultIEW<Impl>::executeInsts()
if (ldstQueue.violation(tid)) {
assert(inst->isMemRef());
DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
O3DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
"%s, inst PC: %s. Addr is: %#x.\n",
@@ -1439,7 +1442,7 @@ DefaultIEW<Impl>::writebackInsts()
// as part of backwards communication.
for (int inst_num = 0; inst_num < wbWidth &&
toCommit->insts[inst_num]; inst_num++) {
DynInstPtr inst = toCommit->insts[inst_num];
O3DynInstPtr inst = toCommit->insts[inst_num];
ThreadID tid = inst->threadNumber;
DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %s.\n",
@@ -1610,7 +1613,7 @@ DefaultIEW<Impl>::tick()
template <class Impl>
void
DefaultIEW<Impl>::updateExeInstStats(const DynInstPtr& inst)
DefaultIEW<Impl>::updateExeInstStats(const O3DynInstPtr& inst)
{
ThreadID tid = inst->threadNumber;
@@ -1642,7 +1645,7 @@ DefaultIEW<Impl>::updateExeInstStats(const DynInstPtr& inst)
template <class Impl>
void
DefaultIEW<Impl>::checkMisprediction(const DynInstPtr& inst)
DefaultIEW<Impl>::checkMisprediction(const O3DynInstPtr& inst)
{
ThreadID tid = inst->threadNumber;

View File

@@ -32,8 +32,6 @@
#include "cpu/o3/comm.hh"
// Forward declarations.
class BaseO3DynInst;
template <class Impl>
class FullO3CPU;
@@ -66,15 +64,6 @@ struct O3CPUImpl
typedef TimeBufStruct<O3CPUImpl> TimeStruct;
/** The DynInst type to be used. */
typedef BaseO3DynInst DynInst;
/** The refcounted DynInst pointer to be used. In most cases this is
* what should be used, and not DynInst *.
*/
typedef RefCountingPtr<DynInst> DynInstPtr;
typedef RefCountingPtr<const DynInst> DynInstConstPtr;
/** The O3CPU type to be used. */
typedef FullO3CPU<O3CPUImpl> O3CPU;

View File

@@ -51,6 +51,7 @@
#include "base/types.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dep_graph.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/mem_dep_unit.hh"
#include "cpu/o3/store_set.hh"
@@ -89,19 +90,18 @@ class InstructionQueue
public:
//Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::IssueStruct IssueStruct;
typedef typename Impl::TimeStruct TimeStruct;
// Typedef of iterator through the list of instructions.
typedef typename std::list<DynInstPtr>::iterator ListIt;
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
/** FU completion event class. */
class FUCompletion : public Event
{
private:
/** Executing instruction. */
DynInstPtr inst;
O3DynInstPtr inst;
/** Index of the FU used for executing. */
int fuIdx;
@@ -116,7 +116,7 @@ class InstructionQueue
public:
/** Construct a FU completion event. */
FUCompletion(const DynInstPtr &_inst, int fu_idx,
FUCompletion(const O3DynInstPtr &_inst, int fu_idx,
InstructionQueue<Impl> *iq_ptr);
virtual void process();
@@ -177,40 +177,43 @@ class InstructionQueue
bool hasReadyInsts();
/** Inserts a new instruction into the IQ. */
void insert(const DynInstPtr &new_inst);
void insert(const O3DynInstPtr &new_inst);
/** Inserts a new, non-speculative instruction into the IQ. */
void insertNonSpec(const DynInstPtr &new_inst);
void insertNonSpec(const O3DynInstPtr &new_inst);
/** Inserts a memory or write barrier into the IQ to make sure
* loads and stores are ordered properly.
*/
void insertBarrier(const DynInstPtr &barr_inst);
void insertBarrier(const O3DynInstPtr &barr_inst);
/** Returns the oldest scheduled instruction, and removes it from
* the list of instructions waiting to execute.
*/
DynInstPtr getInstToExecute();
O3DynInstPtr getInstToExecute();
/** Gets a memory instruction that was referred due to a delayed DTB
* translation if it is now ready to execute. NULL if none available.
*/
DynInstPtr getDeferredMemInstToExecute();
O3DynInstPtr getDeferredMemInstToExecute();
/** Gets a memory instruction that was blocked on the cache. NULL if none
* available.
*/
DynInstPtr getBlockedMemInstToExecute();
O3DynInstPtr getBlockedMemInstToExecute();
/**
* Records the instruction as the producer of a register without
* adding it to the rest of the IQ.
*/
void recordProducer(const DynInstPtr &inst)
{ addToProducers(inst); }
void
recordProducer(const O3DynInstPtr &inst)
{
addToProducers(inst);
}
/** Process FU completion event. */
void processFUCompletion(const DynInstPtr &inst, int fu_idx);
void processFUCompletion(const O3DynInstPtr &inst, int fu_idx);
/**
* Schedules ready instructions, adding the ready ones (oldest first) to
@@ -228,34 +231,35 @@ class InstructionQueue
void commit(const InstSeqNum &inst, ThreadID tid = 0);
/** Wakes all dependents of a completed instruction. */
int wakeDependents(const DynInstPtr &completed_inst);
int wakeDependents(const O3DynInstPtr &completed_inst);
/** Adds a ready memory instruction to the ready list. */
void addReadyMemInst(const DynInstPtr &ready_inst);
void addReadyMemInst(const O3DynInstPtr &ready_inst);
/**
* Reschedules a memory instruction. It will be ready to issue once
* replayMemInst() is called.
*/
void rescheduleMemInst(const DynInstPtr &resched_inst);
void rescheduleMemInst(const O3DynInstPtr &resched_inst);
/** Replays a memory instruction. It must be rescheduled first. */
void replayMemInst(const DynInstPtr &replay_inst);
void replayMemInst(const O3DynInstPtr &replay_inst);
/**
* Defers a memory instruction when its DTB translation incurs a hw
* page table walk.
*/
void deferMemInst(const DynInstPtr &deferred_inst);
void deferMemInst(const O3DynInstPtr &deferred_inst);
/** Defers a memory instruction when it is cache blocked. */
void blockMemInst(const DynInstPtr &blocked_inst);
void blockMemInst(const O3DynInstPtr &blocked_inst);
/** Notify instruction queue that a previous blockage has resolved */
void cacheUnblocked();
/** Indicates an ordering violation between a store and a load. */
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load);
void violation(const O3DynInstPtr &store,
const O3DynInstPtr &faulting_load);
/**
* Squashes instructions for a thread. Squashing information is obtained
@@ -310,23 +314,23 @@ class InstructionQueue
//////////////////////////////////////
/** List of all the instructions in the IQ (some of which may be issued). */
std::list<DynInstPtr> instList[O3MaxThreads];
std::list<O3DynInstPtr> instList[O3MaxThreads];
/** List of instructions that are ready to be executed. */
std::list<DynInstPtr> instsToExecute;
std::list<O3DynInstPtr> instsToExecute;
/** List of instructions waiting for their DTB translation to
* complete (hw page table walk in progress).
*/
std::list<DynInstPtr> deferredMemInsts;
std::list<O3DynInstPtr> deferredMemInsts;
/** List of instructions that have been cache blocked. */
std::list<DynInstPtr> blockedMemInsts;
std::list<O3DynInstPtr> blockedMemInsts;
/** List of instructions that were cache blocked, but a retry has been seen
* since, so they can now be retried. May fail again go on the blocked list.
*/
std::list<DynInstPtr> retryMemInsts;
std::list<O3DynInstPtr> retryMemInsts;
/**
* Struct for comparing entries to be added to the priority queue.
@@ -335,16 +339,14 @@ class InstructionQueue
* numbers (and hence are older) will be at the top of the
* priority queue.
*/
struct pqCompare
struct PqCompare
{
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
bool operator()(const O3DynInstPtr &lhs,
const O3DynInstPtr &rhs) const;
};
typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
ReadyInstQueue;
typedef std::priority_queue<
O3DynInstPtr, std::vector<O3DynInstPtr>, PqCompare> ReadyInstQueue;
/** List of ready instructions, per op class. They are separated by op
* class to allow for easy mapping to FUs.
@@ -358,9 +360,9 @@ class InstructionQueue
* the sequence number will be available. Thus it is most efficient to be
* able to search by the sequence number alone.
*/
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
std::map<InstSeqNum, O3DynInstPtr> nonSpecInsts;
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
typedef typename std::map<InstSeqNum, O3DynInstPtr>::iterator NonSpecMapIt;
/** Entry for the list age ordering by op class. */
struct ListOrderEntry
@@ -397,7 +399,7 @@ class InstructionQueue
*/
void moveToYoungerInst(ListOrderIt age_order_it);
DependencyGraph<DynInstPtr> dependGraph;
DependencyGraph<O3DynInstPtr> dependGraph;
//////////////////////////////////////
// Various parameters
@@ -450,13 +452,13 @@ class InstructionQueue
std::vector<bool> regScoreboard;
/** Adds an instruction to the dependency graph, as a consumer. */
bool addToDependents(const DynInstPtr &new_inst);
bool addToDependents(const O3DynInstPtr &new_inst);
/** Adds an instruction to the dependency graph, as a producer. */
void addToProducers(const DynInstPtr &new_inst);
void addToProducers(const O3DynInstPtr &new_inst);
/** Moves an instruction to the ready queue if it is ready. */
void addIfReady(const DynInstPtr &inst);
void addIfReady(const O3DynInstPtr &inst);
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function

View File

@@ -46,6 +46,7 @@
#include <vector>
#include "base/logging.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/fu_pool.hh"
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/limits.hh"
@@ -59,7 +60,7 @@
using std::list;
template <class Impl>
InstructionQueue<Impl>::FUCompletion::FUCompletion(const DynInstPtr &_inst,
InstructionQueue<Impl>::FUCompletion::FUCompletion(const O3DynInstPtr &_inst,
int fu_idx, InstructionQueue<Impl> *iq_ptr)
: Event(Stat_Event_Pri, AutoDelete),
inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
@@ -576,7 +577,7 @@ InstructionQueue<Impl>::hasReadyInsts()
template <class Impl>
void
InstructionQueue<Impl>::insert(const DynInstPtr &new_inst)
InstructionQueue<Impl>::insert(const O3DynInstPtr &new_inst)
{
if (new_inst->isFloating()) {
iqIOStats.fpInstQueueWrites++;
@@ -622,7 +623,7 @@ InstructionQueue<Impl>::insert(const DynInstPtr &new_inst)
template <class Impl>
void
InstructionQueue<Impl>::insertNonSpec(const DynInstPtr &new_inst)
InstructionQueue<Impl>::insertNonSpec(const O3DynInstPtr &new_inst)
{
// @todo: Clean up this code; can do it by setting inst as unable
// to issue, then calling normal insert on the inst.
@@ -669,7 +670,7 @@ InstructionQueue<Impl>::insertNonSpec(const DynInstPtr &new_inst)
template <class Impl>
void
InstructionQueue<Impl>::insertBarrier(const DynInstPtr &barr_inst)
InstructionQueue<Impl>::insertBarrier(const O3DynInstPtr &barr_inst)
{
memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
@@ -677,11 +678,11 @@ InstructionQueue<Impl>::insertBarrier(const DynInstPtr &barr_inst)
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
InstructionQueue<Impl>::getInstToExecute()
{
assert(!instsToExecute.empty());
DynInstPtr inst = std::move(instsToExecute.front());
O3DynInstPtr inst = std::move(instsToExecute.front());
instsToExecute.pop_front();
if (inst->isFloating()) {
iqIOStats.fpInstQueueReads++;
@@ -748,7 +749,8 @@ InstructionQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
template <class Impl>
void
InstructionQueue<Impl>::processFUCompletion(const DynInstPtr &inst, int fu_idx)
InstructionQueue<Impl>::processFUCompletion(
const O3DynInstPtr &inst, int fu_idx)
{
DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
assert(!cpu->switchedOut());
@@ -779,7 +781,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
DynInstPtr mem_inst;
O3DynInstPtr mem_inst;
while ((mem_inst = std::move(getDeferredMemInstToExecute()))) {
addReadyMemInst(mem_inst);
}
@@ -806,7 +808,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
assert(!readyInsts[op_class].empty());
DynInstPtr issuing_inst = readyInsts[op_class].top();
O3DynInstPtr issuing_inst = readyInsts[op_class].top();
if (issuing_inst->isFloating()) {
iqIOStats.fpInstQueueReads++;
@@ -986,7 +988,7 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, ThreadID tid)
template <class Impl>
int
InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
InstructionQueue<Impl>::wakeDependents(const O3DynInstPtr &completed_inst)
{
int dependents = 0;
@@ -1054,7 +1056,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
//Go through the dependency chain, marking the registers as
//ready within the waiting instructions.
DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
while (dep_inst) {
DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
@@ -1086,7 +1088,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
template <class Impl>
void
InstructionQueue<Impl>::addReadyMemInst(const DynInstPtr &ready_inst)
InstructionQueue<Impl>::addReadyMemInst(const O3DynInstPtr &ready_inst)
{
OpClass op_class = ready_inst->opClass();
@@ -1109,7 +1111,7 @@ InstructionQueue<Impl>::addReadyMemInst(const DynInstPtr &ready_inst)
template <class Impl>
void
InstructionQueue<Impl>::rescheduleMemInst(const DynInstPtr &resched_inst)
InstructionQueue<Impl>::rescheduleMemInst(const O3DynInstPtr &resched_inst)
{
DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
@@ -1123,21 +1125,21 @@ InstructionQueue<Impl>::rescheduleMemInst(const DynInstPtr &resched_inst)
template <class Impl>
void
InstructionQueue<Impl>::replayMemInst(const DynInstPtr &replay_inst)
InstructionQueue<Impl>::replayMemInst(const O3DynInstPtr &replay_inst)
{
memDepUnit[replay_inst->threadNumber].replay();
}
template <class Impl>
void
InstructionQueue<Impl>::deferMemInst(const DynInstPtr &deferred_inst)
InstructionQueue<Impl>::deferMemInst(const O3DynInstPtr &deferred_inst)
{
deferredMemInsts.push_back(deferred_inst);
}
template <class Impl>
void
InstructionQueue<Impl>::blockMemInst(const DynInstPtr &blocked_inst)
InstructionQueue<Impl>::blockMemInst(const O3DynInstPtr &blocked_inst)
{
blocked_inst->clearIssued();
blocked_inst->clearCanIssue();
@@ -1154,13 +1156,13 @@ InstructionQueue<Impl>::cacheUnblocked()
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
InstructionQueue<Impl>::getDeferredMemInstToExecute()
{
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
DynInstPtr mem_inst = std::move(*it);
O3DynInstPtr mem_inst = std::move(*it);
deferredMemInsts.erase(it);
return mem_inst;
}
@@ -1169,13 +1171,13 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
InstructionQueue<Impl>::getBlockedMemInstToExecute()
{
if (retryMemInsts.empty()) {
return nullptr;
} else {
DynInstPtr mem_inst = std::move(retryMemInsts.front());
O3DynInstPtr mem_inst = std::move(retryMemInsts.front());
retryMemInsts.pop_front();
return mem_inst;
}
@@ -1183,8 +1185,8 @@ InstructionQueue<Impl>::getBlockedMemInstToExecute()
template <class Impl>
void
InstructionQueue<Impl>::violation(const DynInstPtr &store,
const DynInstPtr &faulting_load)
InstructionQueue<Impl>::violation(const O3DynInstPtr &store,
const O3DynInstPtr &faulting_load)
{
iqIOStats.intInstQueueWrites++;
memDepUnit[store->threadNumber].violation(store, faulting_load);
@@ -1223,7 +1225,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
while (squash_it != instList[tid].end() &&
(*squash_it)->seqNum > squashedSeqNum[tid]) {
DynInstPtr squashed_inst = (*squash_it);
O3DynInstPtr squashed_inst = (*squash_it);
if (squashed_inst->isFloating()) {
iqIOStats.fpInstQueueWrites++;
} else if (squashed_inst->isVector()) {
@@ -1329,7 +1331,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
// IQ clears out the heads of the dependency graph only when
// instructions reach writeback stage. If an instruction is squashed
// before writeback stage, its head of dependency graph would not be
// cleared out; it holds the instruction's DynInstPtr. This prevents
// cleared out; it holds the instruction's O3DynInstPtr. This prevents
// freeing the squashed instruction's DynInst.
// Thus, we need to manually clear out the squashed instructions' heads
// of dependency graph.
@@ -1352,7 +1354,15 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
template <class Impl>
bool
InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
InstructionQueue<Impl>::PqCompare::operator()(
const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
template <class Impl>
bool
InstructionQueue<Impl>::addToDependents(const O3DynInstPtr &new_inst)
{
// Loop through the instruction's source registers, adding
// them to the dependency list if they are not ready.
@@ -1400,7 +1410,7 @@ InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
template <class Impl>
void
InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
InstructionQueue<Impl>::addToProducers(const O3DynInstPtr &new_inst)
{
// Nothing really needs to be marked when an instruction becomes
// the producer of a register's value, but for convenience a ptr
@@ -1436,7 +1446,7 @@ InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
template <class Impl>
void
InstructionQueue<Impl>::addIfReady(const DynInstPtr &inst)
InstructionQueue<Impl>::addIfReady(const O3DynInstPtr &inst)
{
// If the instruction now has all of its source registers
// available, then add it to the list of ready instructions.

View File

@@ -53,6 +53,8 @@
#include "base/flags.hh"
#include "base/types.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/impl.hh"
#include "cpu/utils.hh"
#include "enums/SMTQueuePolicy.hh"
#include "mem/port.hh"
@@ -74,7 +76,6 @@ class LSQ
{
public:
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
class LSQRequest;
/** Derived class to hold any sender state the LSQ needs. */
@@ -93,7 +94,7 @@ class LSQ
public:
/** Instruction which initiated the access to memory. */
DynInstPtr inst;
O3DynInstPtr inst;
/** The main packet from a split load, used during writeback. */
PacketPtr mainPkt;
/** A second packet from a split store that needs sending. */
@@ -113,7 +114,7 @@ class LSQ
* case the SenderState knows.
*/
bool deleted;
ContextID contextId() { return inst->contextId(); }
ContextID contextId();
/** Completes a packet and returns whether the access is finished. */
inline bool isComplete() { return outstanding == 0; }
@@ -293,7 +294,7 @@ class LSQ
public:
LSQUnit<Impl>& _port;
const DynInstPtr _inst;
const O3DynInstPtr _inst;
uint32_t _taskId;
PacketDataPtr _data;
std::vector<PacketPtr> _packets;
@@ -308,38 +309,11 @@ class LSQ
AtomicOpFunctorPtr _amo_op;
protected:
LSQUnit<Impl>* lsqUnit() { return &_port; }
LSQRequest(LSQUnit<Impl> *port, const DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
_numOutstandingPackets(0), _amo_op(nullptr)
{
flags.set(Flag::IsLoad, isLoad);
flags.set(Flag::WbStore,
_inst->isStoreConditional() || _inst->isAtomic());
flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
LSQRequest(LSQUnit<Impl>* port, const DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_,
PacketDataPtr data = nullptr, uint64_t* res = nullptr,
AtomicOpFunctorPtr amo_op = nullptr)
: _state(State::NotIssued), _senderState(nullptr),
numTranslatedFragments(0),
numInTranslationFragments(0),
_port(*port), _inst(inst), _data(data),
_res(res), _addr(addr), _size(size),
_flags(flags_),
_numOutstandingPackets(0),
_amo_op(std::move(amo_op))
{
flags.set(Flag::IsLoad, isLoad);
flags.set(Flag::WbStore,
_inst->isStoreConditional() || _inst->isAtomic());
flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad);
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr);
bool
isLoad() const
@@ -354,21 +328,9 @@ class LSQ
}
/** Install the request in the LQ/SQ. */
void install()
{
if (isLoad()) {
_port.loadQueue[_inst->lqIdx].setRequest(this);
} else {
// Store, StoreConditional, and Atomic requests are pushed
// to this storeQueue
_port.storeQueue[_inst->sqIdx].setRequest(this);
}
}
virtual bool
squashed() const override
{
return _inst->isSquashed();
}
void install();
bool squashed() const override;
/**
* Test if the LSQRequest has been released, i.e. self-owned.
@@ -391,7 +353,8 @@ class LSQ
* but there is any in-flight translation request to the TLB or access
* request to the memory.
*/
void release(Flag reason)
void
release(Flag reason)
{
assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
if (!isAnyOutstandingRequest()) {
@@ -410,35 +373,14 @@ class LSQ
* The request is only added if the mask is empty or if there is at
* least an active element in it.
*/
void
addRequest(Addr addr, unsigned size,
const std::vector<bool>& byte_enable)
{
if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
auto request = std::make_shared<Request>(
addr, size, _flags, _inst->requestorId(),
_inst->instAddr(), _inst->contextId(),
std::move(_amo_op));
request->setByteEnable(byte_enable);
_requests.push_back(request);
}
}
void addRequest(Addr addr, unsigned size,
const std::vector<bool>& byte_enable);
/** Destructor.
* The LSQRequest owns the request. If the packet has already been
* sent, the sender state will be deleted upon receiving the reply.
*/
virtual ~LSQRequest()
{
assert(!isAnyOutstandingRequest());
_inst->savedReq = nullptr;
if (_senderState)
delete _senderState;
for (auto r: _packets)
delete r;
};
virtual ~LSQRequest();
public:
/** Convenience getters/setters. */
@@ -450,7 +392,7 @@ class LSQ
request()->setContext(context_id);
}
const DynInstPtr&
const O3DynInstPtr&
instruction()
{
return _inst;
@@ -728,7 +670,7 @@ class LSQ
using LSQRequest::_numOutstandingPackets;
using LSQRequest::_amo_op;
public:
SingleDataRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
SingleDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr) :
@@ -766,7 +708,7 @@ class LSQ
using LSQRequest::flags;
using LSQRequest::setState;
public:
HtmCmdRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
HtmCmdRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
const Request::Flags& flags_);
inline virtual ~HtmCmdRequest() {}
virtual void initiateTranslation();
@@ -813,7 +755,7 @@ class LSQ
PacketPtr _mainPacket;
public:
SplitDataRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
SplitDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags & flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr) :
@@ -876,15 +818,15 @@ class LSQ
void tick();
/** Inserts a load into the LSQ. */
void insertLoad(const DynInstPtr &load_inst);
void insertLoad(const O3DynInstPtr &load_inst);
/** Inserts a store into the LSQ. */
void insertStore(const DynInstPtr &store_inst);
void insertStore(const O3DynInstPtr &store_inst);
/** Executes a load. */
Fault executeLoad(const DynInstPtr &inst);
Fault executeLoad(const O3DynInstPtr &inst);
/** Executes a store. */
Fault executeStore(const DynInstPtr &inst);
Fault executeStore(const O3DynInstPtr &inst);
/**
* Commits loads up until the given sequence number for a specific thread.
@@ -924,7 +866,7 @@ class LSQ
bool violation(ThreadID tid) { return thread.at(tid).violation(); }
/** Gets the instruction that caused the memory ordering violation. */
DynInstPtr
O3DynInstPtr
getMemDepViolator(ThreadID tid)
{
return thread.at(tid).getMemDepViolator();
@@ -1103,7 +1045,7 @@ class LSQ
void recvTimingSnoopReq(PacketPtr pkt);
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op,
const std::vector<bool>& byte_enable);

View File

@@ -49,6 +49,7 @@
#include "base/compiler.hh"
#include "base/logging.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/iew.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/lsq.hh"
@@ -59,6 +60,13 @@
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
template <class Impl>
ContextID
LSQ<Impl>::LSQSenderState::contextId()
{
return inst->contextId();
}
template <class Impl>
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, DefaultIEW<Impl> *iew_ptr,
const DerivO3CPUParams &params)
@@ -220,7 +228,7 @@ LSQ<Impl>::cachePortBusy(bool is_load)
template<class Impl>
void
LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
LSQ<Impl>::insertLoad(const O3DynInstPtr &load_inst)
{
ThreadID tid = load_inst->threadNumber;
@@ -229,7 +237,7 @@ LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
template<class Impl>
void
LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
LSQ<Impl>::insertStore(const O3DynInstPtr &store_inst)
{
ThreadID tid = store_inst->threadNumber;
@@ -238,7 +246,7 @@ LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
template<class Impl>
Fault
LSQ<Impl>::executeLoad(const DynInstPtr &inst)
LSQ<Impl>::executeLoad(const O3DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -247,7 +255,7 @@ LSQ<Impl>::executeLoad(const DynInstPtr &inst)
template<class Impl>
Fault
LSQ<Impl>::executeStore(const DynInstPtr &inst)
LSQ<Impl>::executeStore(const O3DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -676,7 +684,7 @@ LSQ<Impl>::dumpInsts() const
template<class Impl>
Fault
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
LSQ<Impl>::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op,
const std::vector<bool>& byte_enable)
@@ -951,6 +959,85 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
}
}
template<class Impl>
LSQ<Impl>::LSQRequest::LSQRequest(
LSQUnit<Impl> *port, const O3DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
_numOutstandingPackets(0), _amo_op(nullptr)
{
flags.set(Flag::IsLoad, isLoad);
flags.set(Flag::WbStore,
_inst->isStoreConditional() || _inst->isAtomic());
flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
template<class Impl>
LSQ<Impl>::LSQRequest::LSQRequest(
LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op)
: _state(State::NotIssued), _senderState(nullptr),
numTranslatedFragments(0),
numInTranslationFragments(0),
_port(*port), _inst(inst), _data(data),
_res(res), _addr(addr), _size(size),
_flags(flags_),
_numOutstandingPackets(0),
_amo_op(std::move(amo_op))
{
flags.set(Flag::IsLoad, isLoad);
flags.set(Flag::WbStore,
_inst->isStoreConditional() || _inst->isAtomic());
flags.set(Flag::IsAtomic, _inst->isAtomic());
install();
}
template<class Impl>
void
LSQ<Impl>::LSQRequest::install()
{
if (isLoad()) {
_port.loadQueue[_inst->lqIdx].setRequest(this);
} else {
// Store, StoreConditional, and Atomic requests are pushed
// to this storeQueue
_port.storeQueue[_inst->sqIdx].setRequest(this);
}
}
template<class Impl>
bool LSQ<Impl>::LSQRequest::squashed() const { return _inst->isSquashed(); }
template<class Impl>
void
LSQ<Impl>::LSQRequest::addRequest(Addr addr, unsigned size,
const std::vector<bool>& byte_enable)
{
if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
auto request = std::make_shared<Request>(
addr, size, _flags, _inst->requestorId(),
_inst->instAddr(), _inst->contextId(),
std::move(_amo_op));
request->setByteEnable(byte_enable);
_requests.push_back(request);
}
}
template<class Impl>
LSQ<Impl>::LSQRequest::~LSQRequest()
{
assert(!isAnyOutstandingRequest());
_inst->savedReq = nullptr;
if (_senderState)
delete _senderState;
for (auto r: _packets)
delete r;
};
template<class Impl>
void
LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
@@ -1226,7 +1313,7 @@ LSQ<Impl>::DcachePort::recvReqRetry()
template<class Impl>
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit<Impl>* port,
const DynInstPtr& inst,
const O3DynInstPtr& inst,
const Request::Flags& flags_) :
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
nullptr, nullptr, nullptr)

View File

@@ -53,6 +53,7 @@
#include "arch/locked_mem.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/timebuf.hh"
#include "debug/HtmCpu.hh"
@@ -85,7 +86,6 @@ class LSQUnit
static constexpr auto MaxDataBytes = MaxVecRegLenInBytes;
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::IssueStruct IssueStruct;
using LSQSenderState = typename LSQ<Impl>::LSQSenderState;
@@ -95,23 +95,17 @@ class LSQUnit
{
private:
/** The instruction. */
DynInstPtr inst;
O3DynInstPtr inst;
/** The request. */
LSQRequest* req;
LSQRequest* req = nullptr;
/** The size of the operation. */
uint32_t _size;
uint32_t _size = 0;
/** Valid entry. */
bool _valid;
public:
/** Constructs an empty store queue entry. */
LSQEntry()
: inst(nullptr), req(nullptr), _size(0), _valid(false)
{
}
bool _valid = false;
public:
~LSQEntry()
{
inst = nullptr;
if (req != nullptr) {
req->freeLSQEntry();
req = nullptr;
@@ -131,13 +125,14 @@ class LSQUnit
}
void
set(const DynInstPtr& inst)
set(const O3DynInstPtr& inst)
{
assert(!_valid);
this->inst = inst;
_valid = true;
_size = 0;
}
LSQRequest* request() { return req; }
void setRequest(LSQRequest* r) { req = r; }
bool hasRequest() { return req != nullptr; }
@@ -146,7 +141,7 @@ class LSQUnit
bool valid() const { return _valid; }
uint32_t& size() { return _size; }
const uint32_t& size() const { return _size; }
const DynInstPtr& instruction() const { return inst; }
const O3DynInstPtr& instruction() const { return inst; }
/** @} */
};
@@ -156,32 +151,27 @@ class LSQUnit
/** The store data. */
char _data[MaxDataBytes];
/** Whether or not the store can writeback. */
bool _canWB;
bool _canWB = false;
/** Whether or not the store is committed. */
bool _committed;
bool _committed = false;
/** Whether or not the store is completed. */
bool _completed;
bool _completed = false;
/** Does this request write all zeros and thus doesn't
* have any data attached to it. Used for cache block zero
* style instructs (ARM DC ZVA; ALPHA WH64)
*/
bool _isAllZeros;
bool _isAllZeros = false;
public:
static constexpr size_t DataSize = sizeof(_data);
/** Constructs an empty store queue entry. */
SQEntry()
: _canWB(false), _committed(false), _completed(false),
_isAllZeros(false)
{
std::memset(_data, 0, DataSize);
}
~SQEntry()
{
}
void
set(const DynInstPtr& inst)
set(const O3DynInstPtr& inst)
{
LSQEntry::set(inst);
}
@@ -192,6 +182,7 @@ class LSQUnit
LSQEntry::clear();
_canWB = _completed = _committed = _isAllZeros = false;
}
/** Member accessors. */
/** @{ */
bool& canWB() { return _canWB; }
@@ -250,11 +241,11 @@ class LSQUnit
void takeOverFrom();
/** Inserts an instruction. */
void insert(const DynInstPtr &inst);
void insert(const O3DynInstPtr &inst);
/** Inserts a load instruction. */
void insertLoad(const DynInstPtr &load_inst);
void insertLoad(const O3DynInstPtr &load_inst);
/** Inserts a store instruction. */
void insertStore(const DynInstPtr &store_inst);
void insertStore(const O3DynInstPtr &store_inst);
/** Check for ordering violations in the LSQ. For a store squash if we
* ever find a conflicting load. For a load, only squash if we
@@ -263,7 +254,7 @@ class LSQUnit
* @param inst the instruction to check
*/
Fault checkViolations(typename LoadQueue::iterator& loadIt,
const DynInstPtr& inst);
const O3DynInstPtr& inst);
/** Check if an incoming invalidate hits in the lsq on a load
* that might have issued out of order wrt another load beacuse
@@ -272,11 +263,11 @@ class LSQUnit
void checkSnoop(PacketPtr pkt);
/** Executes a load instruction. */
Fault executeLoad(const DynInstPtr &inst);
Fault executeLoad(const O3DynInstPtr &inst);
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
/** Executes a store instruction. */
Fault executeStore(const DynInstPtr &inst);
Fault executeStore(const O3DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
@@ -302,7 +293,7 @@ class LSQUnit
bool violation() { return memDepViolator; }
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
O3DynInstPtr getMemDepViolator();
/** Returns the number of free LQ entries. */
unsigned numFreeLoadEntries();
@@ -378,7 +369,7 @@ class LSQUnit
void resetState();
/** Writes back the instruction, sending it to IEW. */
void writeback(const DynInstPtr &inst, PacketPtr pkt);
void writeback(const O3DynInstPtr &inst, PacketPtr pkt);
/** Try to finish a previously blocked write back attempt */
void writebackBlockedStore();
@@ -460,7 +451,7 @@ class LSQUnit
{
public:
/** Constructs a writeback event. */
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt,
WritebackEvent(const O3DynInstPtr &_inst, PacketPtr pkt,
LSQUnit *lsq_ptr);
/** Processes the writeback event. */
@@ -471,7 +462,7 @@ class LSQUnit
private:
/** Instruction whose results are being written back. */
DynInstPtr inst;
O3DynInstPtr inst;
/** The packet that would have been sent to memory. */
PacketPtr pkt;
@@ -552,7 +543,7 @@ class LSQUnit
bool storeInFlight;
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
O3DynInstPtr memDepViolator;
/** Whether or not there is a packet that couldn't be sent because of
* a lack of cache ports. */
@@ -634,357 +625,4 @@ class LSQUnit
typedef CircularQueue<SQEntry> SQueue;
};
template <class Impl>
Fault
LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
{
LQEntry& load_req = loadQueue[load_idx];
const DynInstPtr& load_inst = load_req.instruction();
load_req.setRequest(req);
assert(load_inst);
assert(!load_inst->isExecuted());
// Make sure this isn't a strictly ordered load
// A bit of a hackish way to get strictly ordered accesses to work
// only if they're at the head of the LSQ and are ready to commit
// (at the head of the ROB too).
if (req->mainRequest()->isStrictlyOrdered() &&
(load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
load_inst->clearIssued();
load_inst->effAddrValid(false);
++stats.rescheduledLoads;
DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
load_req.setRequest(nullptr);
req->discard();
return std::make_shared<GenericISA::M5PanicFault>(
"Strictly ordered load [sn:%llx] PC %s\n",
load_inst->seqNum, load_inst->pcState());
}
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
"storeHead: %i addr: %#x%s\n",
load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
if (req->mainRequest()->isLLSC()) {
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
load_inst->recordResult(false);
TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
load_inst->recordResult(true);
}
if (req->mainRequest()->isLocalAccess()) {
assert(!load_inst->memData);
assert(!load_inst->inHtmTransactionalState());
load_inst->memData = new uint8_t[MaxDataBytes];
ThreadContext *thread = cpu->tcBase(lsqID);
PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
main_pkt->dataStatic(load_inst->memData);
Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
cpu->schedule(wb, cpu->clockEdge(delay));
return NoFault;
}
// hardware transactional memory
if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
{
// don't want to send nested transactionStarts and
// transactionStops outside of core, e.g. to Ruby
if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
Cycles delay(0);
PacketPtr data_pkt =
new Packet(req->mainRequest(), MemCmd::ReadReq);
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
// sanity checks espect zero in request's data
memset(load_inst->memData, 0, req->mainRequest()->getSize());
}
data_pkt->dataStatic(load_inst->memData);
if (load_inst->inHtmTransactionalState()) {
data_pkt->setHtmTransactional(
load_inst->getHtmTransactionUid());
}
data_pkt->makeResponse();
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
cpu->schedule(wb, cpu->clockEdge(delay));
return NoFault;
}
}
// Check the SQ for any previous stores that might lead to forwarding
auto store_it = load_inst->sqIt;
assert (store_it >= storeWBIt);
// End once we've reached the top of the LSQ
while (store_it != storeWBIt) {
// Move the index to one younger
store_it--;
assert(store_it->valid());
assert(store_it->instruction()->seqNum < load_inst->seqNum);
int store_size = store_it->size();
// Cache maintenance instructions go down via the store
// path but they carry no data and they shouldn't be
// considered for forwarding
if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
!(store_it->request()->mainRequest() &&
store_it->request()->mainRequest()->isCacheMaintenance())) {
assert(store_it->instruction()->effAddrValid());
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
auto req_s = req->mainRequest()->getVaddr();
auto req_e = req_s + req->mainRequest()->getSize();
auto st_s = store_it->instruction()->effAddr;
auto st_e = st_s + store_size;
bool store_has_lower_limit = req_s >= st_s;
bool store_has_upper_limit = req_e <= st_e;
bool lower_load_has_store_part = req_s < st_e;
bool upper_load_has_store_part = req_e > st_s;
auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
// If the store entry is not atomic (atomic does not have valid
// data), the store has all of the data needed, and
// the load is not LLSC, then
// we can forward data from the store to the load
if (!store_it->instruction()->isAtomic() &&
store_has_lower_limit && store_has_upper_limit &&
!req->mainRequest()->isLLSC()) {
const auto& store_req = store_it->request()->mainRequest();
coverage = store_req->isMasked() ?
AddrRangeCoverage::PartialAddrRangeCoverage :
AddrRangeCoverage::FullAddrRangeCoverage;
} else if (
// This is the partial store-load forwarding case where a store
// has only part of the load's data and the load isn't LLSC
(!req->mainRequest()->isLLSC() &&
((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part))) ||
// The load is LLSC, and the store has all or part of the
// load's data
(req->mainRequest()->isLLSC() &&
((store_has_lower_limit || upper_load_has_store_part) &&
(store_has_upper_limit || lower_load_has_store_part))) ||
// The store entry is atomic and has all or part of the load's
// data
(store_it->instruction()->isAtomic() &&
((store_has_lower_limit || upper_load_has_store_part) &&
(store_has_upper_limit || lower_load_has_store_part)))) {
coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
}
if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
// Get shift amount for offset into the store's data.
int shift_amt = req->mainRequest()->getVaddr() -
store_it->instruction()->effAddr;
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
}
if (store_it->isAllZeros())
memset(load_inst->memData, 0,
req->mainRequest()->getSize());
else
memcpy(load_inst->memData,
store_it->data() + shift_amt,
req->mainRequest()->getSize());
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
"addr %#x\n", store_it._idx,
req->mainRequest()->getVaddr());
PacketPtr data_pkt = new Packet(req->mainRequest(),
MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
// hardware transactional memory
// Store to load forwarding within a transaction
// This should be okay because the store will be sent to
// the memory subsystem and subsequently get added to the
// write set of the transaction. The write set has a stronger
// property than the read set, so the load doesn't necessarily
// have to be there.
assert(!req->mainRequest()->isHTMCmd());
if (load_inst->inHtmTransactionalState()) {
assert (!storeQueue[store_it._idx].completed());
assert (
storeQueue[store_it._idx].instruction()->
inHtmTransactionalState());
assert (
load_inst->getHtmTransactionUid() ==
storeQueue[store_it._idx].instruction()->
getHtmTransactionUid());
data_pkt->setHtmTransactional(
load_inst->getHtmTransactionUid());
DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
"pc=0x%lx - vaddr=0x%lx - "
"paddr=0x%lx - htmUid=%u\n",
load_inst->instAddr(),
data_pkt->req->hasVaddr() ?
data_pkt->req->getVaddr() : 0lu,
data_pkt->getAddr(),
load_inst->getHtmTransactionUid());
}
if (req->isAnyOutstandingRequest()) {
assert(req->_numOutstandingPackets > 0);
// There are memory requests packets in flight already.
// This may happen if the store was not complete the
// first time this load got executed. Signal the senderSate
// that response packets should be discarded.
req->discardSenderState();
}
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
this);
// We'll say this has a 1 cycle load-store forwarding latency
// for now.
// @todo: Need to make this a parameter.
cpu->schedule(wb, curTick());
// Don't need to do anything special for split loads.
++stats.forwLoads;
return NoFault;
} else if (coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
// If it's already been written back, then don't worry about
// stalling on it.
if (store_it->completed()) {
panic("Should not check one of these");
continue;
}
// Must stall load and force it to retry, so long as it's the
// oldest load that needs to do so.
if (!stalled ||
(stalled &&
load_inst->seqNum <
loadQueue[stallingLoadIdx].instruction()->seqNum)) {
stalled = true;
stallingStoreIsn = store_it->instruction()->seqNum;
stallingLoadIdx = load_idx;
}
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
load_inst->clearIssued();
load_inst->effAddrValid(false);
++stats.rescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
"Store idx %i to load addr %#x\n",
store_it._idx, req->mainRequest()->getVaddr());
// Must discard the request.
req->discard();
load_req.setRequest(nullptr);
return NoFault;
}
}
}
// If there's no forwarding case, then go access memory
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
}
// hardware transactional memory
if (req->mainRequest()->isHTMCmd()) {
// this is a simple sanity check
// the Ruby cache controller will set
// memData to 0x0ul if successful.
*load_inst->memData = (uint64_t) 0x1ull;
}
// For now, load throughput is constrained by the number of
// load FUs only, and loads do not consume a cache port (only
// stores do).
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
// if we the cache is not blocked, do cache access
if (req->senderState() == nullptr) {
LQSenderState *state = new LQSenderState(
loadQueue.getIterator(load_idx));
state->isLoad = true;
state->inst = load_inst;
state->isSplit = req->isSplit();
req->senderState(state);
}
req->buildPackets();
req->sendPacketToCache();
if (!req->isSent())
iewStage->blockMemInst(load_inst);
return NoFault;
}
template <class Impl>
Fault
LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
{
assert(storeQueue[store_idx].valid());
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
"[sn:%llu]\n",
store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
storeQueue[store_idx].instruction()->seqNum);
storeQueue[store_idx].setRequest(req);
unsigned size = req->_size;
storeQueue[store_idx].size() = size;
bool store_no_data =
req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
storeQueue[store_idx].isAllZeros() = store_no_data;
assert(size <= SQEntry::DataSize || store_no_data);
// copy data into the storeQueue only if the store request has valid data
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
!req->request()->isCacheMaintenance() &&
!req->request()->isAtomic())
memcpy(storeQueue[store_idx].data(), data, size);
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
}
#endif // __CPU_O3_LSQ_UNIT_HH__

View File

@@ -60,7 +60,7 @@
#include "mem/request.hh"
template<class Impl>
LSQUnit<Impl>::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,
LSQUnit<Impl>::WritebackEvent::WritebackEvent(const O3DynInstPtr &_inst,
PacketPtr _pkt, LSQUnit *lsq_ptr)
: Event(Default_Pri, AutoDelete),
inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
@@ -112,7 +112,7 @@ void
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
{
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
DynInstPtr inst = state->inst;
O3DynInstPtr inst = state->inst;
// hardware transactional memory
// sanity check
@@ -317,7 +317,7 @@ LSQUnit<Impl>::takeOverFrom()
template <class Impl>
void
LSQUnit<Impl>::insert(const DynInstPtr &inst)
LSQUnit<Impl>::insert(const O3DynInstPtr &inst)
{
assert(inst->isMemRef());
@@ -334,7 +334,7 @@ LSQUnit<Impl>::insert(const DynInstPtr &inst)
template <class Impl>
void
LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
LSQUnit<Impl>::insertLoad(const O3DynInstPtr &load_inst)
{
assert(!loadQueue.full());
assert(loads < loadQueue.capacity());
@@ -397,7 +397,7 @@ LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
template <class Impl>
void
LSQUnit<Impl>::insertStore(const DynInstPtr& store_inst)
LSQUnit<Impl>::insertStore(const O3DynInstPtr& store_inst)
{
// Make sure it is not full before inserting an instruction.
assert(!storeQueue.full());
@@ -418,10 +418,10 @@ LSQUnit<Impl>::insertStore(const DynInstPtr& store_inst)
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
LSQUnit<Impl>::getMemDepViolator()
{
DynInstPtr temp = memDepViolator;
O3DynInstPtr temp = memDepViolator;
memDepViolator = NULL;
@@ -475,7 +475,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
DynInstPtr ld_inst = iter->instruction();
O3DynInstPtr ld_inst = iter->instruction();
assert(ld_inst);
LSQRequest *req = iter->request();
@@ -535,7 +535,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
template <class Impl>
Fault
LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
const DynInstPtr& inst)
const O3DynInstPtr& inst)
{
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
@@ -546,7 +546,7 @@ LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
* like the implementation that came before it, we're overly conservative.
*/
while (loadIt != loadQueue.end()) {
DynInstPtr ld_inst = loadIt->instruction();
O3DynInstPtr ld_inst = loadIt->instruction();
if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
++loadIt;
continue;
@@ -615,7 +615,7 @@ LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
template <class Impl>
Fault
LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
LSQUnit<Impl>::executeLoad(const O3DynInstPtr &inst)
{
// Execute a specific load.
Fault load_fault = NoFault;
@@ -682,7 +682,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
template <class Impl>
Fault
LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
LSQUnit<Impl>::executeStore(const O3DynInstPtr &store_inst)
{
// Make sure that a store exists.
assert(stores != 0);
@@ -837,7 +837,7 @@ LSQUnit<Impl>::writebackStores()
assert(storeWBIt->hasRequest());
assert(!storeWBIt->committed());
DynInstPtr inst = storeWBIt->instruction();
O3DynInstPtr inst = storeWBIt->instruction();
LSQRequest* req = storeWBIt->request();
// Process store conditionals or store release after all previous
@@ -1095,7 +1095,7 @@ LSQUnit<Impl>::storePostSend()
template <class Impl>
void
LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
LSQUnit<Impl>::writeback(const O3DynInstPtr &inst, PacketPtr pkt)
{
iewStage->wakeCPU();
@@ -1170,7 +1170,7 @@ LSQUnit<Impl>::completeStore(typename StoreQueue::iterator store_idx)
/* We 'need' a copy here because we may clear the entry from the
* store queue. */
DynInstPtr store_inst = store_idx->instruction();
O3DynInstPtr store_inst = store_idx->instruction();
if (store_idx == storeQueue.begin()) {
do {
storeQueue.front().clear();
@@ -1279,7 +1279,7 @@ LSQUnit<Impl>::dumpInsts() const
cprintf("Load queue: ");
for (const auto& e: loadQueue) {
const DynInstPtr &inst(e.instruction());
const O3DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
}
cprintf("\n");
@@ -1288,7 +1288,7 @@ LSQUnit<Impl>::dumpInsts() const
cprintf("Store queue: ");
for (const auto& e: storeQueue) {
const DynInstPtr &inst(e.instruction());
const O3DynInstPtr &inst(e.instruction());
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
}
@@ -1302,4 +1302,358 @@ LSQUnit<Impl>::cacheLineSize()
return cpu->cacheLineSize();
}
template <class Impl>
Fault
LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
{
LQEntry& load_req = loadQueue[load_idx];
const O3DynInstPtr& load_inst = load_req.instruction();
load_req.setRequest(req);
assert(load_inst);
assert(!load_inst->isExecuted());
// Make sure this isn't a strictly ordered load
// A bit of a hackish way to get strictly ordered accesses to work
// only if they're at the head of the LSQ and are ready to commit
// (at the head of the ROB too).
if (req->mainRequest()->isStrictlyOrdered() &&
(load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
load_inst->clearIssued();
load_inst->effAddrValid(false);
++stats.rescheduledLoads;
DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
load_req.setRequest(nullptr);
req->discard();
return std::make_shared<GenericISA::M5PanicFault>(
"Strictly ordered load [sn:%llx] PC %s\n",
load_inst->seqNum, load_inst->pcState());
}
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
"storeHead: %i addr: %#x%s\n",
load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
if (req->mainRequest()->isLLSC()) {
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
load_inst->recordResult(false);
TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
load_inst->recordResult(true);
}
if (req->mainRequest()->isLocalAccess()) {
assert(!load_inst->memData);
assert(!load_inst->inHtmTransactionalState());
load_inst->memData = new uint8_t[MaxDataBytes];
ThreadContext *thread = cpu->tcBase(lsqID);
PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
main_pkt->dataStatic(load_inst->memData);
Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
cpu->schedule(wb, cpu->clockEdge(delay));
return NoFault;
}
// hardware transactional memory
if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
{
// don't want to send nested transactionStarts and
// transactionStops outside of core, e.g. to Ruby
if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
Cycles delay(0);
PacketPtr data_pkt =
new Packet(req->mainRequest(), MemCmd::ReadReq);
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
// sanity checks espect zero in request's data
memset(load_inst->memData, 0, req->mainRequest()->getSize());
}
data_pkt->dataStatic(load_inst->memData);
if (load_inst->inHtmTransactionalState()) {
data_pkt->setHtmTransactional(
load_inst->getHtmTransactionUid());
}
data_pkt->makeResponse();
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
cpu->schedule(wb, cpu->clockEdge(delay));
return NoFault;
}
}
// Check the SQ for any previous stores that might lead to forwarding
auto store_it = load_inst->sqIt;
assert (store_it >= storeWBIt);
// End once we've reached the top of the LSQ
while (store_it != storeWBIt) {
// Move the index to one younger
store_it--;
assert(store_it->valid());
assert(store_it->instruction()->seqNum < load_inst->seqNum);
int store_size = store_it->size();
// Cache maintenance instructions go down via the store
// path but they carry no data and they shouldn't be
// considered for forwarding
if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
!(store_it->request()->mainRequest() &&
store_it->request()->mainRequest()->isCacheMaintenance())) {
assert(store_it->instruction()->effAddrValid());
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
auto req_s = req->mainRequest()->getVaddr();
auto req_e = req_s + req->mainRequest()->getSize();
auto st_s = store_it->instruction()->effAddr;
auto st_e = st_s + store_size;
bool store_has_lower_limit = req_s >= st_s;
bool store_has_upper_limit = req_e <= st_e;
bool lower_load_has_store_part = req_s < st_e;
bool upper_load_has_store_part = req_e > st_s;
auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
// If the store entry is not atomic (atomic does not have valid
// data), the store has all of the data needed, and
// the load is not LLSC, then
// we can forward data from the store to the load
if (!store_it->instruction()->isAtomic() &&
store_has_lower_limit && store_has_upper_limit &&
!req->mainRequest()->isLLSC()) {
const auto& store_req = store_it->request()->mainRequest();
coverage = store_req->isMasked() ?
AddrRangeCoverage::PartialAddrRangeCoverage :
AddrRangeCoverage::FullAddrRangeCoverage;
} else if (
// This is the partial store-load forwarding case where a store
// has only part of the load's data and the load isn't LLSC
(!req->mainRequest()->isLLSC() &&
((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part))) ||
// The load is LLSC, and the store has all or part of the
// load's data
(req->mainRequest()->isLLSC() &&
((store_has_lower_limit || upper_load_has_store_part) &&
(store_has_upper_limit || lower_load_has_store_part))) ||
// The store entry is atomic and has all or part of the load's
// data
(store_it->instruction()->isAtomic() &&
((store_has_lower_limit || upper_load_has_store_part) &&
(store_has_upper_limit || lower_load_has_store_part)))) {
coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
}
if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
// Get shift amount for offset into the store's data.
int shift_amt = req->mainRequest()->getVaddr() -
store_it->instruction()->effAddr;
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData =
new uint8_t[req->mainRequest()->getSize()];
}
if (store_it->isAllZeros())
memset(load_inst->memData, 0,
req->mainRequest()->getSize());
else
memcpy(load_inst->memData,
store_it->data() + shift_amt,
req->mainRequest()->getSize());
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
"addr %#x\n", store_it._idx,
req->mainRequest()->getVaddr());
PacketPtr data_pkt = new Packet(req->mainRequest(),
MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
// hardware transactional memory
// Store to load forwarding within a transaction
// This should be okay because the store will be sent to
// the memory subsystem and subsequently get added to the
// write set of the transaction. The write set has a stronger
// property than the read set, so the load doesn't necessarily
// have to be there.
assert(!req->mainRequest()->isHTMCmd());
if (load_inst->inHtmTransactionalState()) {
assert (!storeQueue[store_it._idx].completed());
assert (
storeQueue[store_it._idx].instruction()->
inHtmTransactionalState());
assert (
load_inst->getHtmTransactionUid() ==
storeQueue[store_it._idx].instruction()->
getHtmTransactionUid());
data_pkt->setHtmTransactional(
load_inst->getHtmTransactionUid());
DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
"pc=0x%lx - vaddr=0x%lx - "
"paddr=0x%lx - htmUid=%u\n",
load_inst->instAddr(),
data_pkt->req->hasVaddr() ?
data_pkt->req->getVaddr() : 0lu,
data_pkt->getAddr(),
load_inst->getHtmTransactionUid());
}
if (req->isAnyOutstandingRequest()) {
assert(req->_numOutstandingPackets > 0);
// There are memory requests packets in flight already.
// This may happen if the store was not complete the
// first time this load got executed. Signal the senderSate
// that response packets should be discarded.
req->discardSenderState();
}
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
this);
// We'll say this has a 1 cycle load-store forwarding latency
// for now.
// @todo: Need to make this a parameter.
cpu->schedule(wb, curTick());
// Don't need to do anything special for split loads.
++stats.forwLoads;
return NoFault;
} else if (
coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
// If it's already been written back, then don't worry about
// stalling on it.
if (store_it->completed()) {
panic("Should not check one of these");
continue;
}
// Must stall load and force it to retry, so long as it's the
// oldest load that needs to do so.
if (!stalled ||
(stalled &&
load_inst->seqNum <
loadQueue[stallingLoadIdx].instruction()->seqNum)) {
stalled = true;
stallingStoreIsn = store_it->instruction()->seqNum;
stallingLoadIdx = load_idx;
}
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
load_inst->clearIssued();
load_inst->effAddrValid(false);
++stats.rescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
"Store idx %i to load addr %#x\n",
store_it._idx, req->mainRequest()->getVaddr());
// Must discard the request.
req->discard();
load_req.setRequest(nullptr);
return NoFault;
}
}
}
// If there's no forwarding case, then go access memory
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
// Allocate memory if this is the first time a load is issued.
if (!load_inst->memData) {
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
}
// hardware transactional memory
if (req->mainRequest()->isHTMCmd()) {
// this is a simple sanity check
// the Ruby cache controller will set
// memData to 0x0ul if successful.
*load_inst->memData = (uint64_t) 0x1ull;
}
// For now, load throughput is constrained by the number of
// load FUs only, and loads do not consume a cache port (only
// stores do).
// @todo We should account for cache port contention
// and arbitrate between loads and stores.
// if we the cache is not blocked, do cache access
if (req->senderState() == nullptr) {
LQSenderState *state = new LQSenderState(
loadQueue.getIterator(load_idx));
state->isLoad = true;
state->inst = load_inst;
state->isSplit = req->isSplit();
req->senderState(state);
}
req->buildPackets();
req->sendPacketToCache();
if (!req->isSent())
iewStage->blockMemInst(load_inst);
return NoFault;
}
template <class Impl>
Fault
LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
{
assert(storeQueue[store_idx].valid());
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
"[sn:%llu]\n",
store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
storeQueue[store_idx].instruction()->seqNum);
storeQueue[store_idx].setRequest(req);
unsigned size = req->_size;
storeQueue[store_idx].size() = size;
bool store_no_data =
req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
storeQueue[store_idx].isAllZeros() = store_no_data;
assert(size <= SQEntry::DataSize || store_no_data);
// copy data into the storeQueue only if the store request has valid data
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
!req->request()->isCacheMaintenance() &&
!req->request()->isAtomic())
memcpy(storeQueue[store_idx].data(), data, size);
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
}
#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__

View File

@@ -49,6 +49,7 @@
#include "base/statistics.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/limits.hh"
#include "debug/MemDepUnit.hh"
@@ -85,8 +86,6 @@ class MemDepUnit
std::string _name;
public:
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::DynInstConstPtr DynInstConstPtr;
typedef typename Impl::O3CPU O3CPU;
/** Empty constructor. Must call init() prior to using in this case. */
@@ -117,22 +116,22 @@ class MemDepUnit
void setIQ(InstructionQueue<Impl> *iq_ptr);
/** Inserts a memory instruction. */
void insert(const DynInstPtr &inst);
void insert(const O3DynInstPtr &inst);
/** Inserts a non-speculative memory instruction. */
void insertNonSpec(const DynInstPtr &inst);
void insertNonSpec(const O3DynInstPtr &inst);
/** Inserts a barrier instruction. */
void insertBarrier(const DynInstPtr &barr_inst);
void insertBarrier(const O3DynInstPtr &barr_inst);
/** Indicate that an instruction has its registers ready. */
void regsReady(const DynInstPtr &inst);
void regsReady(const O3DynInstPtr &inst);
/** Indicate that a non-speculative instruction is ready. */
void nonSpecInstReady(const DynInstPtr &inst);
void nonSpecInstReady(const O3DynInstPtr &inst);
/** Reschedules an instruction to be re-executed. */
void reschedule(const DynInstPtr &inst);
void reschedule(const O3DynInstPtr &inst);
/** Replays all instructions that have been rescheduled by moving them to
* the ready list.
@@ -140,7 +139,7 @@ class MemDepUnit
void replay();
/** Notifies completion of an instruction. */
void completeInst(const DynInstPtr &inst);
void completeInst(const O3DynInstPtr &inst);
/** Squashes all instructions up until a given sequence number for a
* specific thread.
@@ -148,11 +147,11 @@ class MemDepUnit
void squash(const InstSeqNum &squashed_num, ThreadID tid);
/** Indicates an ordering violation between a store and a younger load. */
void violation(const DynInstPtr &store_inst,
const DynInstPtr &violating_load);
void violation(const O3DynInstPtr &store_inst,
const O3DynInstPtr &violating_load);
/** Issues the given instruction */
void issue(const DynInstPtr &inst);
void issue(const O3DynInstPtr &inst);
/** Debugging function to dump the lists of instructions. */
void dumpLists();
@@ -160,12 +159,12 @@ class MemDepUnit
private:
/** Completes a memory instruction. */
void completed(const DynInstPtr &inst);
void completed(const O3DynInstPtr &inst);
/** Wakes any dependents of a memory instruction. */
void wakeDependents(const DynInstPtr &inst);
void wakeDependents(const O3DynInstPtr &inst);
typedef typename std::list<DynInstPtr>::iterator ListIt;
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
class MemDepEntry;
@@ -179,7 +178,7 @@ class MemDepUnit
{
public:
/** Constructs a memory dependence entry. */
MemDepEntry(const DynInstPtr &new_inst)
MemDepEntry(const O3DynInstPtr &new_inst)
: inst(new_inst), regsReady(false), memDeps(0),
completed(false), squashed(false)
{
@@ -209,7 +208,7 @@ class MemDepUnit
std::string name() const { return "memdepentry"; }
/** The instruction being tracked. */
DynInstPtr inst;
O3DynInstPtr inst;
/** The iterator to the instruction's location inside the list. */
ListIt listIt;
@@ -235,10 +234,10 @@ class MemDepUnit
};
/** Finds the memory dependence entry in the hash map. */
inline MemDepEntryPtr &findInHash(const DynInstConstPtr& inst);
MemDepEntryPtr &findInHash(const O3DynInstConstPtr& inst);
/** Moves an entry to the ready list. */
inline void moveToReady(MemDepEntryPtr &ready_inst_entry);
void moveToReady(MemDepEntryPtr &ready_inst_entry);
typedef std::unordered_map<InstSeqNum, MemDepEntryPtr, SNHash> MemDepHash;
@@ -248,10 +247,10 @@ class MemDepUnit
MemDepHash memDepHash;
/** A list of all instructions in the memory dependence unit. */
std::list<DynInstPtr> instList[O3MaxThreads];
std::list<O3DynInstPtr> instList[O3MaxThreads];
/** A list of all instructions that are going to be replayed. */
std::list<DynInstPtr> instsToReplay;
std::list<O3DynInstPtr> instsToReplay;
/** The memory dependence predictor. It is accessed upon new
* instructions being added to the IQ, and responds by telling
@@ -273,7 +272,7 @@ class MemDepUnit
bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); }
/** Inserts the SN of a barrier inst. to the list of tracked barriers */
void insertBarrierSN(const DynInstPtr &barr_inst);
void insertBarrierSN(const O3DynInstPtr &barr_inst);
/** Pointer to the IQ. */
InstructionQueue<Impl> *iqPtr;

View File

@@ -172,7 +172,7 @@ MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const O3DynInstPtr &barr_inst)
{
InstSeqNum barr_sn = barr_inst->seqNum;
@@ -205,7 +205,7 @@ MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::insert(const O3DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
@@ -316,7 +316,7 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const O3DynInstPtr &inst)
{
insertBarrier(inst);
@@ -338,7 +338,7 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
MemDepUnit<MemDepPred, Impl>::insertBarrier(const O3DynInstPtr &barr_inst)
{
ThreadID tid = barr_inst->threadNumber;
@@ -361,7 +361,7 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::regsReady(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::regsReady(const O3DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking registers as ready for "
"instruction PC %s [sn:%lli].\n",
@@ -384,7 +384,7 @@ MemDepUnit<MemDepPred, Impl>::regsReady(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const O3DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking non speculative "
"instruction PC %s as ready [sn:%lli].\n",
@@ -397,7 +397,7 @@ MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::reschedule(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::reschedule(const O3DynInstPtr &inst)
{
instsToReplay.push_back(inst);
}
@@ -406,7 +406,7 @@ template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::replay()
{
DynInstPtr temp_inst;
O3DynInstPtr temp_inst;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
@@ -425,7 +425,7 @@ MemDepUnit<MemDepPred, Impl>::replay()
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::completed(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::completed(const O3DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
@@ -449,7 +449,7 @@ MemDepUnit<MemDepPred, Impl>::completed(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::completeInst(const O3DynInstPtr &inst)
{
wakeDependents(inst);
completed(inst);
@@ -481,7 +481,7 @@ MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::wakeDependents(const O3DynInstPtr &inst)
{
// Only stores, atomics and barriers have dependents.
if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() &&
@@ -570,8 +570,8 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::violation(const DynInstPtr &store_inst,
const DynInstPtr &violating_load)
MemDepUnit<MemDepPred, Impl>::violation(const O3DynInstPtr &store_inst,
const O3DynInstPtr &violating_load)
{
DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
" load: %#x, store: %#x\n", violating_load->instAddr(),
@@ -582,7 +582,7 @@ MemDepUnit<MemDepPred, Impl>::violation(const DynInstPtr &store_inst,
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::issue(const DynInstPtr &inst)
MemDepUnit<MemDepPred, Impl>::issue(const O3DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
inst->instAddr(), inst->seqNum);
@@ -592,7 +592,7 @@ MemDepUnit<MemDepPred, Impl>::issue(const DynInstPtr &inst)
template <class MemDepPred, class Impl>
inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstConstPtr &inst)
MemDepUnit<MemDepPred, Impl>::findInHash(const O3DynInstConstPtr &inst)
{
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);

View File

@@ -40,6 +40,7 @@
#include "base/callback.hh"
#include "base/output.hh"
#include "base/trace.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/reg_class.hh"
#include "debug/ElasticTrace.hh"
#include "mem/packet.hh"
@@ -124,21 +125,21 @@ ElasticTrace::regEtraceListeners()
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
"FetchRequest", &ElasticTrace::fetchReqTrace));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
DynInstConstPtr>(this, "Execute",
O3DynInstConstPtr>(this, "Execute",
&ElasticTrace::recordExecTick));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
DynInstConstPtr>(this, "ToCommit",
O3DynInstConstPtr>(this, "ToCommit",
&ElasticTrace::recordToCommTick));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
DynInstConstPtr>(this, "Rename",
O3DynInstConstPtr>(this, "Rename",
&ElasticTrace::updateRegDep));
listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
"SquashInRename", &ElasticTrace::removeRegDepMapEntry));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
DynInstConstPtr>(this, "Squash",
O3DynInstConstPtr>(this, "Squash",
&ElasticTrace::addSquashedInst));
listeners.push_back(new ProbeListenerArg<ElasticTrace,
DynInstConstPtr>(this, "Commit",
O3DynInstConstPtr>(this, "Commit",
&ElasticTrace::addCommittedInst));
allProbesReg = true;
}
@@ -166,7 +167,7 @@ ElasticTrace::fetchReqTrace(const RequestPtr &req)
}
void
ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
ElasticTrace::recordExecTick(const O3DynInstConstPtr& dyn_inst)
{
// In a corner case, a retired instruction is propagated backward to the
@@ -203,7 +204,7 @@ ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
}
void
ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
ElasticTrace::recordToCommTick(const O3DynInstConstPtr& dyn_inst)
{
// If tracing has just been enabled then the instruction at this stage of
// execution is far enough that we cannot gather info about its past like
@@ -224,7 +225,7 @@ ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
}
void
ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
ElasticTrace::updateRegDep(const O3DynInstConstPtr& dyn_inst)
{
// Get the sequence number of the instruction
InstSeqNum seq_num = dyn_inst->seqNum;
@@ -303,7 +304,7 @@ ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
}
void
ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
ElasticTrace::addSquashedInst(const O3DynInstConstPtr& head_inst)
{
// If the squashed instruction was squashed before being processed by
// execute stage then it will not be in the temporary store. In this case
@@ -331,7 +332,7 @@ ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
}
void
ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
ElasticTrace::addCommittedInst(const O3DynInstConstPtr& head_inst)
{
DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
head_inst->seqNum);
@@ -390,7 +391,7 @@ ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
}
void
ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
ElasticTrace::addDepTraceRecord(const O3DynInstConstPtr& head_inst,
InstExecInfo* exec_info_ptr, bool commit)
{
// Create a record to assign dynamic intruction related fields.
@@ -652,7 +653,7 @@ ElasticTrace::hasCompCompleted(TraceInfo* past_record,
}
void
ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
ElasticTrace::clearTempStoreUntil(const O3DynInstConstPtr& head_inst)
{
// Clear from temp store starting with the execution info object
// corresponding the head_inst and continue clearing by decrementing the

View File

@@ -50,7 +50,7 @@
#include <unordered_map>
#include <utility>
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/impl.hh"
#include "mem/request.hh"
#include "params/ElasticTrace.hh"
@@ -85,8 +85,6 @@ class ElasticTrace : public ProbeListenerObject
{
public:
typedef typename O3CPUImpl::DynInstPtr DynInstPtr;
typedef typename O3CPUImpl::DynInstConstPtr DynInstConstPtr;
typedef typename std::pair<InstSeqNum, RegIndex> SeqNumRegPair;
/** Trace record types corresponding to instruction node types */
@@ -129,7 +127,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void recordExecTick(const DynInstConstPtr& dyn_inst);
void recordExecTick(const O3DynInstConstPtr& dyn_inst);
/**
* Populate the timestamp field in an InstExecInfo object for an
@@ -138,7 +136,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void recordToCommTick(const DynInstConstPtr& dyn_inst);
void recordToCommTick(const O3DynInstConstPtr& dyn_inst);
/**
* Record a Read After Write physical register dependency if there has
@@ -149,7 +147,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param dyn_inst pointer to dynamic instruction in flight
*/
void updateRegDep(const DynInstConstPtr& dyn_inst);
void updateRegDep(const O3DynInstConstPtr& dyn_inst);
/**
* When an instruction gets squashed the destination register mapped to it
@@ -166,14 +164,14 @@ class ElasticTrace : public ProbeListenerObject
*
* @param head_inst pointer to dynamic instruction to be squashed
*/
void addSquashedInst(const DynInstConstPtr& head_inst);
void addSquashedInst(const O3DynInstConstPtr& head_inst);
/**
* Add an instruction that is at the head of the ROB and is committed.
*
* @param head_inst pointer to dynamic instruction to be committed
*/
void addCommittedInst(const DynInstConstPtr& head_inst);
void addCommittedInst(const O3DynInstConstPtr& head_inst);
/** Event to trigger registering this listener for all probe points. */
EventFunctionWrapper regEtraceListenersEvent;
@@ -379,7 +377,7 @@ class ElasticTrace : public ProbeListenerObject
* @param exec_info_ptr Pointer to InstExecInfo for that instruction
* @param commit True if instruction is committed, false if squashed
*/
void addDepTraceRecord(const DynInstConstPtr& head_inst,
void addDepTraceRecord(const O3DynInstConstPtr& head_inst,
InstExecInfo* exec_info_ptr, bool commit);
/**
@@ -388,7 +386,7 @@ class ElasticTrace : public ProbeListenerObject
*
* @param head_inst pointer to dynamic instruction
*/
void clearTempStoreUntil(const DynInstConstPtr& head_inst);
void clearTempStoreUntil(const O3DynInstConstPtr& head_inst);
/**
* Calculate the computational delay between an instruction and a

View File

@@ -38,16 +38,17 @@
#include "cpu/o3/probe/simple_trace.hh"
#include "base/trace.hh"
#include "cpu/o3/dyn_inst.hh"
#include "debug/SimpleTrace.hh"
void SimpleTrace::traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst)
void SimpleTrace::traceCommit(const O3DynInstConstPtr& dynInst)
{
DPRINTFR(SimpleTrace, "[%s]: Commit 0x%08x %s.\n", name(),
dynInst->instAddr(),
dynInst->staticInst->disassemble(dynInst->instAddr()));
}
void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst)
void SimpleTrace::traceFetch(const O3DynInstConstPtr& dynInst)
{
DPRINTFR(SimpleTrace, "[%s]: Fetch 0x%08x %s.\n", name(),
dynInst->instAddr(),
@@ -57,7 +58,7 @@ void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst)
void SimpleTrace::regProbeListeners()
{
typedef ProbeListenerArg<SimpleTrace,
O3CPUImpl::DynInstConstPtr> DynInstListener;
O3DynInstConstPtr> DynInstListener;
listeners.push_back(new DynInstListener(this, "Commit",
&SimpleTrace::traceCommit));
listeners.push_back(new DynInstListener(this, "Fetch",

View File

@@ -44,7 +44,7 @@
#ifndef __CPU_O3_PROBE_SIMPLE_TRACE_HH__
#define __CPU_O3_PROBE_SIMPLE_TRACE_HH__
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/impl.hh"
#include "params/SimpleTrace.hh"
#include "sim/probe/probe.hh"
@@ -69,8 +69,8 @@ class SimpleTrace : public ProbeListenerObject
}
private:
void traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst);
void traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst);
void traceFetch(const O3DynInstConstPtr& dynInst);
void traceCommit(const O3DynInstConstPtr& dynInst);
};
#endif//__CPU_O3_PROBE_SIMPLE_TRACE_HH__

View File

@@ -48,6 +48,7 @@
#include "base/statistics.hh"
#include "config/the_isa.hh"
#include "cpu/o3/commit.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/free_list.hh"
#include "cpu/o3/iew.hh"
#include "cpu/o3/limits.hh"
@@ -73,7 +74,6 @@ class DefaultRename
{
public:
// Typedefs from the Impl.
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DecodeStruct DecodeStruct;
typedef typename Impl::RenameStruct RenameStruct;
@@ -83,7 +83,7 @@ class DefaultRename
// be added to the front of the queue, which is the only reason for
// using a deque instead of a queue. (Most other stages use a
// queue)
typedef std::deque<DynInstPtr> InstQueue;
typedef std::deque<O3DynInstPtr> InstQueue;
public:
/** Overall rename status. Used to determine if the CPU can
@@ -117,7 +117,7 @@ class DefaultRename
/** Probe points. */
typedef typename std::pair<InstSeqNum, PhysRegIdPtr> SeqNumRegPair;
/** To probe when register renaming for an instruction is complete */
ProbePointArg<DynInstPtr> *ppRename;
ProbePointArg<O3DynInstPtr> *ppRename;
/**
* To probe when an instruction is squashed and the register mapping
* for it needs to be undone
@@ -248,22 +248,22 @@ class DefaultRename
void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid);
/** Renames the source registers of an instruction. */
inline void renameSrcRegs(const DynInstPtr &inst, ThreadID tid);
void renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid);
/** Renames the destination registers of an instruction. */
inline void renameDestRegs(const DynInstPtr &inst, ThreadID tid);
void renameDestRegs(const O3DynInstPtr &inst, ThreadID tid);
/** Calculates the number of free ROB entries for a specific thread. */
inline int calcFreeROBEntries(ThreadID tid);
int calcFreeROBEntries(ThreadID tid);
/** Calculates the number of free IQ entries for a specific thread. */
inline int calcFreeIQEntries(ThreadID tid);
int calcFreeIQEntries(ThreadID tid);
/** Calculates the number of free LQ entries for a specific thread. */
inline int calcFreeLQEntries(ThreadID tid);
int calcFreeLQEntries(ThreadID tid);
/** Calculates the number of free SQ entries for a specific thread. */
inline int calcFreeSQEntries(ThreadID tid);
int calcFreeSQEntries(ThreadID tid);
/** Returns the number of valid instructions coming from decode. */
unsigned validInsts();
@@ -417,7 +417,7 @@ class DefaultRename
Stalls stalls[O3MaxThreads];
/** The serialize instruction that rename has stalled on. */
DynInstPtr serializeInst[O3MaxThreads];
O3DynInstPtr serializeInst[O3MaxThreads];
/** Records if rename needs to serialize on the next instruction for any
* thread.

View File

@@ -177,7 +177,8 @@ template <class Impl>
void
DefaultRename<Impl>::regProbePoints()
{
ppRename = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Rename");
ppRename = new ProbePointArg<O3DynInstPtr>(
cpu->getProbeManager(), "Rename");
ppSquashInRename = new ProbePointArg<SeqNumRegPair>(cpu->getProbeManager(),
"SquashInRename");
}
@@ -612,11 +613,12 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
assert(!insts_to_rename.empty());
DynInstPtr inst = insts_to_rename.front();
O3DynInstPtr inst = insts_to_rename.front();
//For all kind of instructions, check ROB and IQ first
//For load instruction, check LQ size and take into account the inflight loads
//For store instruction, check SQ size and take into account the inflight stores
//For all kind of instructions, check ROB and IQ first For load
//instruction, check LQ size and take into account the inflight loads
//For store instruction, check SQ size and take into account the
//inflight stores
if (inst->isLoad()) {
if (calcFreeLQEntries(tid) <= 0) {
@@ -774,7 +776,7 @@ template<class Impl>
void
DefaultRename<Impl>::skidInsert(ThreadID tid)
{
DynInstPtr inst = NULL;
O3DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
@@ -811,7 +813,7 @@ DefaultRename<Impl>::sortInsts()
{
int insts_from_decode = fromDecode->size;
for (int i = 0; i < insts_from_decode; ++i) {
const DynInstPtr &inst = fromDecode->insts[i];
const O3DynInstPtr &inst = fromDecode->insts[i];
insts[inst->threadNumber].push_back(inst);
#if TRACING_ON
if (Debug::O3PipeView) {
@@ -1035,7 +1037,7 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid)
template <class Impl>
inline void
DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
DefaultRename<Impl>::renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid)
{
ThreadContext *tc = inst->tcBase();
UnifiedRenameMap *map = renameMap[tid];
@@ -1102,7 +1104,7 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
template <class Impl>
inline void
DefaultRename<Impl>::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
DefaultRename<Impl>::renameDestRegs(const O3DynInstPtr &inst, ThreadID tid)
{
ThreadContext *tc = inst->tcBase();
UnifiedRenameMap *map = renameMap[tid];
@@ -1369,7 +1371,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(ThreadID tid)
DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to "
"unblocking.\n", tid);
DynInstPtr serial_inst = serializeInst[tid];
O3DynInstPtr serial_inst = serializeInst[tid];
renameStatus[tid] = Unblocking;

View File

@@ -60,10 +60,9 @@ class ROB
public:
//Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef std::pair<RegIndex, RegIndex> UnmapInfo;
typedef typename std::list<DynInstPtr>::iterator InstIt;
typedef typename std::list<O3DynInstPtr>::iterator InstIt;
/** Possible ROB statuses. */
enum Status
@@ -105,36 +104,36 @@ class ROB
* ROB for the new instruction.
* @param inst The instruction being inserted into the ROB.
*/
void insertInst(const DynInstPtr &inst);
void insertInst(const O3DynInstPtr &inst);
/** Returns pointer to the head instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
// DynInstPtr readHeadInst();
// O3DynInstPtr readHeadInst();
/** Returns a pointer to the head instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the head of the ROB.
*/
const DynInstPtr &readHeadInst(ThreadID tid);
const O3DynInstPtr &readHeadInst(ThreadID tid);
/** Returns a pointer to the instruction with the given sequence if it is
* in the ROB.
*/
DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
O3DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
/** Returns pointer to the tail instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the tail of the ROB.
*/
// DynInstPtr readTailInst();
// O3DynInstPtr readTailInst();
/** Returns a pointer to the tail instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the tail of the ROB.
*/
DynInstPtr readTailInst(ThreadID tid);
O3DynInstPtr readTailInst(ThreadID tid);
/** Retires the head instruction, removing it from the ROB. */
// void retireHead();
@@ -277,7 +276,7 @@ class ROB
unsigned maxEntries[O3MaxThreads];
/** ROB List of Instructions */
std::list<DynInstPtr> instList[O3MaxThreads];
std::list<O3DynInstPtr> instList[O3MaxThreads];
/** Number of instructions that can be squashed in a single cycle. */
unsigned squashWidth;
@@ -308,7 +307,7 @@ class ROB
int numInstsInROB;
/** Dummy instruction returned if there are no insts left. */
DynInstPtr dummyInst;
O3DynInstPtr dummyInst;
private:
/** The sequence number of the squashed instruction. */

View File

@@ -200,7 +200,7 @@ ROB<Impl>::countInsts(ThreadID tid)
template <class Impl>
void
ROB<Impl>::insertInst(const DynInstPtr &inst)
ROB<Impl>::insertInst(const O3DynInstPtr &inst)
{
assert(inst);
@@ -246,7 +246,7 @@ ROB<Impl>::retireHead(ThreadID tid)
// Get the head ROB instruction by copying it and remove it from the list
InstIt head_it = instList[tid].begin();
DynInstPtr head_inst = std::move(*head_it);
O3DynInstPtr head_inst = std::move(*head_it);
instList[tid].erase(head_it);
assert(head_inst->readyToCommit());
@@ -428,7 +428,7 @@ ROB<Impl>::updateHead()
InstIt head_thread = instList[tid].begin();
DynInstPtr head_inst = (*head_thread);
O3DynInstPtr head_inst = (*head_thread);
assert(head_inst != 0);
@@ -513,7 +513,7 @@ ROB<Impl>::squash(InstSeqNum squash_num, ThreadID tid)
}
template <class Impl>
const typename Impl::DynInstPtr&
const O3DynInstPtr&
ROB<Impl>::readHeadInst(ThreadID tid)
{
if (threadEntries[tid] != 0) {
@@ -528,7 +528,7 @@ ROB<Impl>::readHeadInst(ThreadID tid)
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
ROB<Impl>::readTailInst(ThreadID tid)
{
InstIt tail_thread = instList[tid].end();
@@ -546,7 +546,7 @@ ROB<Impl>::ROBStats::ROBStats(Stats::Group *parent)
}
template <class Impl>
typename Impl::DynInstPtr
O3DynInstPtr
ROB<Impl>::findInst(ThreadID tid, InstSeqNum squash_inst)
{
for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {