cpu: Drop the DynInstPtr types from O3CPUImpl.
Aside from basic code editting, this also moves some methods from the .hh files to the _impl.hh files. It also changes the Checker CPU template to take the DynInstPtr type directly instead of through Impl since that was the only type it used anyway. Finally it sets up a header file which predeclares the O3DynInstPtr and O3DynInstConstPtr types so they can be used without having to also include the BaseO3DynInst class definition to break circular dependencies. Change-Id: I5ca6af38ec13e6e820abcdb3748412e4f7fc1c78 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42101 Reviewed-by: Nathanael Premillieu <nathanael.premillieu@huawei.com> Maintainer: Gabe Black <gabe.black@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -51,6 +51,7 @@
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/exec_context.hh"
|
||||
#include "cpu/inst_res.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/pc_event.hh"
|
||||
#include "cpu/simple_thread.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
@@ -559,12 +560,9 @@ class CheckerCPU : public BaseCPU, public ExecContext
|
||||
* template instantiations of the Checker must be placed at the bottom
|
||||
* of checker/cpu.cc.
|
||||
*/
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
class Checker : public CheckerCPU
|
||||
{
|
||||
private:
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
public:
|
||||
Checker(const Params &p)
|
||||
: CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL)
|
||||
|
||||
@@ -59,9 +59,9 @@
|
||||
#include "sim/sim_object.hh"
|
||||
#include "sim/stats.hh"
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::advancePC(const Fault &fault)
|
||||
Checker<DynInstPtr>::advancePC(const Fault &fault)
|
||||
{
|
||||
if (fault != NoFault) {
|
||||
curMacroStaticInst = nullStaticInstPtr;
|
||||
@@ -80,9 +80,9 @@ Checker<Impl>::advancePC(const Fault &fault)
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::handlePendingInt()
|
||||
Checker<DynInstPtr>::handlePendingInt()
|
||||
{
|
||||
DPRINTF(Checker, "IRQ detected at PC: %s with %d insts in buffer\n",
|
||||
thread->pcState(), instList.size());
|
||||
@@ -114,9 +114,9 @@ Checker<Impl>::handlePendingInt()
|
||||
curMacroStaticInst = nullStaticInstPtr;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::verify(const DynInstPtr &completed_inst)
|
||||
Checker<DynInstPtr>::verify(const DynInstPtr &completed_inst)
|
||||
{
|
||||
DynInstPtr inst;
|
||||
|
||||
@@ -428,22 +428,19 @@ Checker<Impl>::verify(const DynInstPtr &completed_inst)
|
||||
unverifiedInst = NULL;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::switchOut()
|
||||
Checker<DynInstPtr>::switchOut()
|
||||
{
|
||||
instList.clear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
Checker<Impl>::takeOverFrom(BaseCPU *oldCPU)
|
||||
{
|
||||
}
|
||||
template <class DynInstPtr>
|
||||
void Checker<DynInstPtr>::takeOverFrom(BaseCPU *oldCPU) {}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::validateInst(const DynInstPtr &inst)
|
||||
Checker<DynInstPtr>::validateInst(const DynInstPtr &inst)
|
||||
{
|
||||
if (inst->instAddr() != thread->instAddr()) {
|
||||
warn("%lli: PCs do not match! Inst: %s, checker: %s",
|
||||
@@ -462,9 +459,9 @@ Checker<Impl>::validateInst(const DynInstPtr &inst)
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::validateExecution(const DynInstPtr &inst)
|
||||
Checker<DynInstPtr>::validateExecution(const DynInstPtr &inst)
|
||||
{
|
||||
InstResult checker_val;
|
||||
InstResult inst_val;
|
||||
@@ -555,9 +552,9 @@ Checker<Impl>::validateExecution(const DynInstPtr &inst)
|
||||
// This function is weird, if it is called it means the Checker and
|
||||
// O3 have diverged, so panic is called for now. It may be useful
|
||||
// to resynch states and continue if the divergence is a false positive
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::validateState()
|
||||
Checker<DynInstPtr>::validateState()
|
||||
{
|
||||
if (updateThisCycle) {
|
||||
// Change this back to warn if divergences end up being false positives
|
||||
@@ -580,10 +577,10 @@ Checker<Impl>::validateState()
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::copyResult(const DynInstPtr &inst,
|
||||
const InstResult& mismatch_val, int start_idx)
|
||||
Checker<DynInstPtr>::copyResult(
|
||||
const DynInstPtr &inst, const InstResult& mismatch_val, int start_idx)
|
||||
{
|
||||
// We've already popped one dest off the queue,
|
||||
// so do the fix-up then start with the next dest reg;
|
||||
@@ -657,9 +654,9 @@ Checker<Impl>::copyResult(const DynInstPtr &inst,
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::dumpAndExit(const DynInstPtr &inst)
|
||||
Checker<DynInstPtr>::dumpAndExit(const DynInstPtr &inst)
|
||||
{
|
||||
cprintf("Error detected, instruction information:\n");
|
||||
cprintf("PC:%s, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n"
|
||||
@@ -673,9 +670,9 @@ Checker<Impl>::dumpAndExit(const DynInstPtr &inst)
|
||||
CheckerCPU::dumpAndExit();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
Checker<Impl>::dumpInsts()
|
||||
Checker<DynInstPtr>::dumpInsts()
|
||||
{
|
||||
int num = 0;
|
||||
|
||||
|
||||
@@ -43,4 +43,4 @@
|
||||
#include "cpu/checker/cpu_impl.hh"
|
||||
|
||||
template
|
||||
class Checker<O3CPUImpl>;
|
||||
class Checker<O3DynInstPtr>;
|
||||
|
||||
@@ -48,10 +48,10 @@
|
||||
/**
|
||||
* Specific non-templated derived class used for SimObject configuration.
|
||||
*/
|
||||
class O3Checker : public Checker<O3CPUImpl>
|
||||
class O3Checker : public Checker<O3DynInstPtr>
|
||||
{
|
||||
public:
|
||||
O3Checker(const Params &p) : Checker<O3CPUImpl>(p)
|
||||
O3Checker(const Params &p) : Checker<O3DynInstPtr>(p)
|
||||
{
|
||||
// The checker should check all instructions executed by the main
|
||||
// cpu and therefore any parameters for early exit don't make much
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
#include "arch/types.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "sim/faults.hh"
|
||||
|
||||
@@ -54,11 +55,9 @@
|
||||
template<class Impl>
|
||||
struct DefaultFetchDefaultDecode
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[O3MaxWidth];
|
||||
O3DynInstPtr insts[O3MaxWidth];
|
||||
Fault fetchFault;
|
||||
InstSeqNum fetchFaultSN;
|
||||
bool clearFetchFault;
|
||||
@@ -68,34 +67,28 @@ struct DefaultFetchDefaultDecode
|
||||
template<class Impl>
|
||||
struct DefaultDecodeDefaultRename
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[O3MaxWidth];
|
||||
O3DynInstPtr insts[O3MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines the information passed from rename to IEW. */
|
||||
template<class Impl>
|
||||
struct DefaultRenameDefaultIEW
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[O3MaxWidth];
|
||||
O3DynInstPtr insts[O3MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines the information passed from IEW to commit. */
|
||||
template<class Impl>
|
||||
struct DefaultIEWDefaultCommit
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[O3MaxWidth];
|
||||
DynInstPtr mispredictInst[O3MaxThreads];
|
||||
O3DynInstPtr insts[O3MaxWidth];
|
||||
O3DynInstPtr mispredictInst[O3MaxThreads];
|
||||
Addr mispredPC[O3MaxThreads];
|
||||
InstSeqNum squashedSeqNum[O3MaxThreads];
|
||||
TheISA::PCState pc[O3MaxThreads];
|
||||
@@ -109,23 +102,20 @@ struct DefaultIEWDefaultCommit
|
||||
template<class Impl>
|
||||
struct IssueStruct
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[O3MaxWidth];
|
||||
O3DynInstPtr insts[O3MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines all backwards communication. */
|
||||
template<class Impl>
|
||||
struct TimeBufStruct
|
||||
{
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
struct decodeComm
|
||||
struct DecodeComm
|
||||
{
|
||||
TheISA::PCState nextPC;
|
||||
DynInstPtr mispredictInst;
|
||||
DynInstPtr squashInst;
|
||||
O3DynInstPtr mispredictInst;
|
||||
O3DynInstPtr squashInst;
|
||||
InstSeqNum doneSeqNum;
|
||||
Addr mispredPC;
|
||||
uint64_t branchAddr;
|
||||
@@ -136,15 +126,13 @@ struct TimeBufStruct
|
||||
bool branchTaken;
|
||||
};
|
||||
|
||||
decodeComm decodeInfo[O3MaxThreads];
|
||||
DecodeComm decodeInfo[O3MaxThreads];
|
||||
|
||||
struct renameComm
|
||||
{
|
||||
};
|
||||
struct RenameComm {};
|
||||
|
||||
renameComm renameInfo[O3MaxThreads];
|
||||
RenameComm renameInfo[O3MaxThreads];
|
||||
|
||||
struct iewComm
|
||||
struct IewComm
|
||||
{
|
||||
// Also eventually include skid buffer space.
|
||||
unsigned freeIQEntries;
|
||||
@@ -161,9 +149,9 @@ struct TimeBufStruct
|
||||
bool usedLSQ;
|
||||
};
|
||||
|
||||
iewComm iewInfo[O3MaxThreads];
|
||||
IewComm iewInfo[O3MaxThreads];
|
||||
|
||||
struct commitComm
|
||||
struct CommitComm
|
||||
{
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
// This code has been re-structured for better packing of variables
|
||||
@@ -184,14 +172,14 @@ struct TimeBufStruct
|
||||
|
||||
/// Provide fetch the instruction that mispredicted, if this
|
||||
/// pointer is not-null a misprediction occured
|
||||
DynInstPtr mispredictInst; // *F
|
||||
O3DynInstPtr mispredictInst; // *F
|
||||
|
||||
/// Instruction that caused the a non-mispredict squash
|
||||
DynInstPtr squashInst; // *F
|
||||
O3DynInstPtr squashInst; // *F
|
||||
|
||||
/// Hack for now to send back a strictly ordered access to the
|
||||
/// IEW stage.
|
||||
DynInstPtr strictlyOrderedLoad; // *I
|
||||
O3DynInstPtr strictlyOrderedLoad; // *I
|
||||
|
||||
/// Communication specifically to the IQ to tell the IQ that it can
|
||||
/// schedule a non-speculative instruction.
|
||||
@@ -227,7 +215,7 @@ struct TimeBufStruct
|
||||
|
||||
};
|
||||
|
||||
commitComm commitInfo[O3MaxThreads];
|
||||
CommitComm commitInfo[O3MaxThreads];
|
||||
|
||||
bool decodeBlock[O3MaxThreads];
|
||||
bool decodeUnblock[O3MaxThreads];
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/exetrace.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/rename_map.hh"
|
||||
@@ -87,7 +88,6 @@ class DefaultCommit
|
||||
public:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::TimeStruct TimeStruct;
|
||||
typedef typename Impl::FetchStruct FetchStruct;
|
||||
typedef typename Impl::IEWStruct IEWStruct;
|
||||
@@ -126,10 +126,10 @@ class DefaultCommit
|
||||
CommitPolicy commitPolicy;
|
||||
|
||||
/** Probe Points. */
|
||||
ProbePointArg<DynInstPtr> *ppCommit;
|
||||
ProbePointArg<DynInstPtr> *ppCommitStall;
|
||||
ProbePointArg<O3DynInstPtr> *ppCommit;
|
||||
ProbePointArg<O3DynInstPtr> *ppCommitStall;
|
||||
/** To probe when an instruction is squashed */
|
||||
ProbePointArg<DynInstPtr> *ppSquash;
|
||||
ProbePointArg<O3DynInstPtr> *ppSquash;
|
||||
|
||||
/** Mark the thread as processing a trap. */
|
||||
void processTrapEvent(ThreadID tid);
|
||||
@@ -277,7 +277,7 @@ class DefaultCommit
|
||||
* @param tid ID of the thread to squash.
|
||||
* @param head_inst Instruction that requested the squash.
|
||||
*/
|
||||
void squashAfter(ThreadID tid, const DynInstPtr &head_inst);
|
||||
void squashAfter(ThreadID tid, const O3DynInstPtr &head_inst);
|
||||
|
||||
/** Handles processing an interrupt. */
|
||||
void handleInterrupt();
|
||||
@@ -291,7 +291,7 @@ class DefaultCommit
|
||||
/** Tries to commit the head ROB instruction passed in.
|
||||
* @param head_inst The instruction to be committed.
|
||||
*/
|
||||
bool commitHead(const DynInstPtr &head_inst, unsigned inst_num);
|
||||
bool commitHead(const O3DynInstPtr &head_inst, unsigned inst_num);
|
||||
|
||||
/** Gets instructions from rename and inserts them into the ROB. */
|
||||
void getInsts();
|
||||
@@ -385,7 +385,7 @@ class DefaultCommit
|
||||
* that caused a squash since this needs to be passed to the fetch
|
||||
* stage once squashing starts.
|
||||
*/
|
||||
DynInstPtr squashAfterInst[O3MaxThreads];
|
||||
O3DynInstPtr squashAfterInst[O3MaxThreads];
|
||||
|
||||
/** Priority List used for Commit Policy */
|
||||
std::list<ThreadID> priority_list;
|
||||
@@ -472,7 +472,7 @@ class DefaultCommit
|
||||
bool avoidQuiesceLiveLock;
|
||||
|
||||
/** Updates commit stats based on this instruction. */
|
||||
void updateComInstStats(const DynInstPtr &inst);
|
||||
void updateComInstStats(const O3DynInstPtr &inst);
|
||||
|
||||
// HTM
|
||||
int htmStarts[O3MaxThreads];
|
||||
|
||||
@@ -54,6 +54,7 @@
|
||||
#include "cpu/exetrace.hh"
|
||||
#include "cpu/null_static_inst.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/thread_state.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
@@ -140,9 +141,12 @@ template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::regProbePoints()
|
||||
{
|
||||
ppCommit = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Commit");
|
||||
ppCommitStall = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "CommitStall");
|
||||
ppSquash = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Squash");
|
||||
ppCommit = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Commit");
|
||||
ppCommitStall = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "CommitStall");
|
||||
ppSquash = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Squash");
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -653,7 +657,7 @@ DefaultCommit<Impl>::squashFromSquashAfter(ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::squashAfter(ThreadID tid, const DynInstPtr &head_inst)
|
||||
DefaultCommit<Impl>::squashAfter(ThreadID tid, const O3DynInstPtr &head_inst)
|
||||
{
|
||||
DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%llu]\n",
|
||||
tid, head_inst->seqNum);
|
||||
@@ -713,14 +717,14 @@ DefaultCommit<Impl>::tick()
|
||||
// will be active.
|
||||
_nextStatus = Active;
|
||||
|
||||
GEM5_VAR_USED const DynInstPtr &inst = rob->readHeadInst(tid);
|
||||
GEM5_VAR_USED const O3DynInstPtr &inst = rob->readHeadInst(tid);
|
||||
|
||||
DPRINTF(Commit,"[tid:%i] Instruction [sn:%llu] PC %s is head of"
|
||||
" ROB and ready to commit\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
|
||||
} else if (!rob->isEmpty(tid)) {
|
||||
const DynInstPtr &inst = rob->readHeadInst(tid);
|
||||
const O3DynInstPtr &inst = rob->readHeadInst(tid);
|
||||
|
||||
ppCommitStall->notify(inst);
|
||||
|
||||
@@ -1001,7 +1005,7 @@ DefaultCommit<Impl>::commitInsts()
|
||||
|
||||
unsigned num_committed = 0;
|
||||
|
||||
DynInstPtr head_inst;
|
||||
O3DynInstPtr head_inst;
|
||||
|
||||
// Commit as many instructions as possible until the commit bandwidth
|
||||
// limit is reached, or it becomes impossible to commit any more.
|
||||
@@ -1192,7 +1196,8 @@ DefaultCommit<Impl>::commitInsts()
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
|
||||
DefaultCommit<Impl>::commitHead(
|
||||
const O3DynInstPtr &head_inst, unsigned inst_num)
|
||||
{
|
||||
assert(head_inst);
|
||||
|
||||
@@ -1391,7 +1396,7 @@ DefaultCommit<Impl>::getInsts()
|
||||
int insts_to_process = std::min((int)renameWidth, fromRename->size);
|
||||
|
||||
for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) {
|
||||
const DynInstPtr &inst = fromRename->insts[inst_num];
|
||||
const O3DynInstPtr &inst = fromRename->insts[inst_num];
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
if (!inst->isSquashed() &&
|
||||
@@ -1438,7 +1443,7 @@ DefaultCommit<Impl>::markCompletedInsts()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::updateComInstStats(const DynInstPtr &inst)
|
||||
DefaultCommit<Impl>::updateComInstStats(const O3DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
@@ -1583,7 +1588,7 @@ DefaultCommit<Impl>::oldestReady()
|
||||
|
||||
if (rob->isHeadReady(tid)) {
|
||||
|
||||
const DynInstPtr &head_inst = rob->readHeadInst(tid);
|
||||
const O3DynInstPtr &head_inst = rob->readHeadInst(tid);
|
||||
|
||||
if (first) {
|
||||
oldest = tid;
|
||||
|
||||
@@ -136,7 +136,7 @@ FullO3CPU<Impl>::FullO3CPU(const DerivO3CPUParams ¶ms)
|
||||
|
||||
if (params.checker) {
|
||||
BaseCPU *temp_checker = params.checker;
|
||||
checker = dynamic_cast<Checker<Impl> *>(temp_checker);
|
||||
checker = dynamic_cast<Checker<O3DynInstPtr> *>(temp_checker);
|
||||
checker->setIcachePort(&this->fetch.getInstPort());
|
||||
checker->setSystem(params.system);
|
||||
} else {
|
||||
@@ -378,8 +378,11 @@ FullO3CPU<Impl>::regProbePoints()
|
||||
{
|
||||
BaseCPU::regProbePoints();
|
||||
|
||||
ppInstAccessComplete = new ProbePointArg<PacketPtr>(getProbeManager(), "InstAccessComplete");
|
||||
ppDataAccessComplete = new ProbePointArg<std::pair<DynInstPtr, PacketPtr> >(getProbeManager(), "DataAccessComplete");
|
||||
ppInstAccessComplete = new ProbePointArg<PacketPtr>(
|
||||
getProbeManager(), "InstAccessComplete");
|
||||
ppDataAccessComplete = new ProbePointArg<
|
||||
std::pair<O3DynInstPtr, PacketPtr>>(
|
||||
getProbeManager(), "DataAccessComplete");
|
||||
|
||||
fetch.regProbePoints();
|
||||
rename.regProbePoints();
|
||||
@@ -1501,7 +1504,7 @@ FullO3CPU<Impl>::squashFromTC(ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
typename FullO3CPU<Impl>::ListIt
|
||||
FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
|
||||
FullO3CPU<Impl>::addInst(const O3DynInstPtr &inst)
|
||||
{
|
||||
instList.push_back(inst);
|
||||
|
||||
@@ -1510,7 +1513,7 @@ FullO3CPU<Impl>::addInst(const DynInstPtr &inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
|
||||
FullO3CPU<Impl>::instDone(ThreadID tid, const O3DynInstPtr &inst)
|
||||
{
|
||||
// Keep an instruction count.
|
||||
if (!inst->isMicroop() || inst->isLastMicroop()) {
|
||||
@@ -1530,7 +1533,7 @@ FullO3CPU<Impl>::instDone(ThreadID tid, const DynInstPtr &inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::removeFrontInst(const DynInstPtr &inst)
|
||||
FullO3CPU<Impl>::removeFrontInst(const O3DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s "
|
||||
"[sn:%lli]\n",
|
||||
@@ -1686,7 +1689,7 @@ FullO3CPU<Impl>::dumpInsts()
|
||||
/*
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::wakeDependents(const DynInstPtr &inst)
|
||||
FullO3CPU<Impl>::wakeDependents(const O3DynInstPtr &inst)
|
||||
{
|
||||
iew.wakeDependents(inst);
|
||||
}
|
||||
|
||||
@@ -56,6 +56,7 @@
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/fetch.hh"
|
||||
#include "cpu/o3/free_list.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
@@ -100,13 +101,12 @@ class FullO3CPU : public BaseO3CPU
|
||||
{
|
||||
public:
|
||||
// Typedefs from the Impl here.
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
typedef O3ThreadState<Impl> ImplState;
|
||||
typedef O3ThreadState<Impl> Thread;
|
||||
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
|
||||
|
||||
friend class O3ThreadContext<Impl>;
|
||||
|
||||
@@ -184,7 +184,7 @@ class FullO3CPU : public BaseO3CPU
|
||||
~FullO3CPU();
|
||||
|
||||
ProbePointArg<PacketPtr> *ppInstAccessComplete;
|
||||
ProbePointArg<std::pair<DynInstPtr, PacketPtr> > *ppDataAccessComplete;
|
||||
ProbePointArg<std::pair<O3DynInstPtr, PacketPtr> > *ppDataAccessComplete;
|
||||
|
||||
/** Register probe points. */
|
||||
void regProbePoints() override;
|
||||
@@ -439,15 +439,15 @@ class FullO3CPU : public BaseO3CPU
|
||||
/** Function to add instruction onto the head of the list of the
|
||||
* instructions. Used when new instructions are fetched.
|
||||
*/
|
||||
ListIt addInst(const DynInstPtr &inst);
|
||||
ListIt addInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Function to tell the CPU that an instruction has completed. */
|
||||
void instDone(ThreadID tid, const DynInstPtr &inst);
|
||||
void instDone(ThreadID tid, const O3DynInstPtr &inst);
|
||||
|
||||
/** Remove an instruction from the front end of the list. There's
|
||||
* no restriction on location of the instruction.
|
||||
*/
|
||||
void removeFrontInst(const DynInstPtr &inst);
|
||||
void removeFrontInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Remove all instructions that are not currently in the ROB.
|
||||
* There's also an option to not squash delay slot instructions.*/
|
||||
@@ -472,7 +472,7 @@ class FullO3CPU : public BaseO3CPU
|
||||
#endif
|
||||
|
||||
/** List of all the instructions in flight. */
|
||||
std::list<DynInstPtr> instList;
|
||||
std::list<O3DynInstPtr> instList;
|
||||
|
||||
/** List of all the instructions that will be removed at the end of this
|
||||
* cycle.
|
||||
@@ -624,7 +624,7 @@ class FullO3CPU : public BaseO3CPU
|
||||
* instruction results at run time. This can be set to NULL if it
|
||||
* is not being used.
|
||||
*/
|
||||
Checker<Impl> *checker;
|
||||
Checker<O3DynInstPtr> *checker;
|
||||
|
||||
/** Pointer to the system. */
|
||||
System *system;
|
||||
@@ -648,7 +648,7 @@ class FullO3CPU : public BaseO3CPU
|
||||
std::vector<ThreadID> tids;
|
||||
|
||||
/** CPU pushRequest function, forwards request to LSQ. */
|
||||
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
unsigned int size, Addr addr, Request::Flags flags,
|
||||
uint64_t *res, AtomicOpFunctorPtr amo_op = nullptr,
|
||||
const std::vector<bool>& byte_enable =
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <queue>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
|
||||
@@ -62,7 +63,6 @@ class DefaultDecode
|
||||
private:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::FetchStruct FetchStruct;
|
||||
typedef typename Impl::DecodeStruct DecodeStruct;
|
||||
typedef typename Impl::TimeStruct TimeStruct;
|
||||
@@ -193,7 +193,7 @@ class DefaultDecode
|
||||
/** Squashes if there is a PC-relative branch that was predicted
|
||||
* incorrectly. Sends squash information back to fetch.
|
||||
*/
|
||||
void squash(const DynInstPtr &inst, ThreadID tid);
|
||||
void squash(const O3DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
public:
|
||||
/** Squashes due to commit signalling a squash. Changes status to
|
||||
@@ -235,10 +235,10 @@ class DefaultDecode
|
||||
typename TimeBuffer<FetchStruct>::wire fromFetch;
|
||||
|
||||
/** Queue of all instructions coming from fetch this cycle. */
|
||||
std::queue<DynInstPtr> insts[O3MaxThreads];
|
||||
std::queue<O3DynInstPtr> insts[O3MaxThreads];
|
||||
|
||||
/** Skid buffer between fetch and decode. */
|
||||
std::queue<DynInstPtr> skidBuffer[O3MaxThreads];
|
||||
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
|
||||
|
||||
/** Variable that tracks if decode has written to the time buffer this
|
||||
* cycle. Used to tell CPU if there is activity this cycle.
|
||||
@@ -285,7 +285,7 @@ class DefaultDecode
|
||||
Addr bdelayDoneSeqNum[O3MaxThreads];
|
||||
|
||||
/** Instruction used for squashing branch (used for MIPS)*/
|
||||
DynInstPtr squashInst[O3MaxThreads];
|
||||
O3DynInstPtr squashInst[O3MaxThreads];
|
||||
|
||||
/** Tells when their is a pending delay slot inst. to send
|
||||
* to rename. If there is, then wait squash after the next
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "debug/Activity.hh"
|
||||
#include "debug/Decode.hh"
|
||||
@@ -293,7 +294,7 @@ DefaultDecode<Impl>::unblock(ThreadID tid)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::squash(const DynInstPtr &inst, ThreadID tid)
|
||||
DefaultDecode<Impl>::squash(const O3DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
|
||||
"prediction detected at decode.\n", tid, inst->seqNum);
|
||||
@@ -395,7 +396,7 @@ template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::skidInsert(ThreadID tid)
|
||||
{
|
||||
DynInstPtr inst = NULL;
|
||||
O3DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
@@ -655,7 +656,7 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
|
||||
++stats.runCycles;
|
||||
}
|
||||
|
||||
std::queue<DynInstPtr>
|
||||
std::queue<O3DynInstPtr>
|
||||
&insts_to_decode = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid] : insts[tid];
|
||||
|
||||
@@ -664,7 +665,7 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
|
||||
while (insts_available > 0 && toRenameIndex < decodeWidth) {
|
||||
assert(!insts_to_decode.empty());
|
||||
|
||||
DynInstPtr inst = std::move(insts_to_decode.front());
|
||||
O3DynInstPtr inst = std::move(insts_to_decode.front());
|
||||
|
||||
insts_to_decode.pop();
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "cpu/inst_res.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/cpu.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/lsq_unit.hh"
|
||||
#include "cpu/op_class.hh"
|
||||
@@ -67,10 +68,6 @@
|
||||
|
||||
class Packet;
|
||||
|
||||
class BaseO3DynInst;
|
||||
|
||||
using O3DynInstPtr = RefCountingPtr<BaseO3DynInst>;
|
||||
|
||||
class BaseO3DynInst : public ExecContext, public RefCounted
|
||||
{
|
||||
public:
|
||||
|
||||
52
src/cpu/o3/dyn_inst_ptr.hh
Normal file
52
src/cpu/o3/dyn_inst_ptr.hh
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2010, 2016 ARM Limited
|
||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_DYN_INST_PTR_HH__
|
||||
#define __CPU_O3_DYN_INST_PTR_HH__
|
||||
|
||||
#include "base/refcnt.hh"
|
||||
|
||||
class BaseO3DynInst;
|
||||
|
||||
using O3DynInstPtr = RefCountingPtr<BaseO3DynInst>;
|
||||
using O3DynInstConstPtr = RefCountingPtr<const BaseO3DynInst>;
|
||||
|
||||
#endif // __CPU_O3_DYN_INST_PTR_HH__
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "arch/decoder.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/pc_event.hh"
|
||||
#include "cpu/pred/bpred_unit.hh"
|
||||
@@ -72,8 +73,6 @@ class DefaultFetch
|
||||
{
|
||||
public:
|
||||
/** Typedefs from Impl. */
|
||||
typedef typename Impl::DynInst DynInst;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::FetchStruct FetchStruct;
|
||||
typedef typename Impl::TimeStruct TimeStruct;
|
||||
@@ -207,7 +206,7 @@ class DefaultFetch
|
||||
std::list<ThreadID> priorityList;
|
||||
|
||||
/** Probe points. */
|
||||
ProbePointArg<DynInstPtr> *ppFetch;
|
||||
ProbePointArg<O3DynInstPtr> *ppFetch;
|
||||
/** To probe when a fetch request is successfully sent. */
|
||||
ProbePointArg<RequestPtr> *ppFetchRequestSent;
|
||||
|
||||
@@ -294,7 +293,7 @@ class DefaultFetch
|
||||
* @param next_NPC Used for ISAs which use delay slots.
|
||||
* @return Whether or not a branch was predicted as taken.
|
||||
*/
|
||||
bool lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &pc);
|
||||
bool lookupAndUpdateNextPC(const O3DynInstPtr &inst, TheISA::PCState &pc);
|
||||
|
||||
/**
|
||||
* Fetches the cache line that contains the fetch PC. Returns any
|
||||
@@ -321,14 +320,14 @@ class DefaultFetch
|
||||
|
||||
/** Squashes a specific thread and resets the PC. */
|
||||
inline void doSquash(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst, ThreadID tid);
|
||||
const O3DynInstPtr squashInst, ThreadID tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
||||
* remove any instructions between fetch and decode
|
||||
* that should be sqaushed.
|
||||
*/
|
||||
void squashFromDecode(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst,
|
||||
const O3DynInstPtr squashInst,
|
||||
const InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
/** Checks if a thread is stalled. */
|
||||
@@ -344,7 +343,7 @@ class DefaultFetch
|
||||
* squash should be the commit stage.
|
||||
*/
|
||||
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
|
||||
DynInstPtr squashInst, ThreadID tid);
|
||||
O3DynInstPtr squashInst, ThreadID tid);
|
||||
|
||||
/** Ticks the fetch stage, processing all inputs signals and fetching
|
||||
* as many instructions as possible.
|
||||
@@ -375,9 +374,9 @@ class DefaultFetch
|
||||
RequestPort &getInstPort() { return icachePort; }
|
||||
|
||||
private:
|
||||
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
StaticInstPtr curMacroop, TheISA::PCState thisPC,
|
||||
TheISA::PCState nextPC, bool trace);
|
||||
O3DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
StaticInstPtr curMacroop, TheISA::PCState thisPC,
|
||||
TheISA::PCState nextPC, bool trace);
|
||||
|
||||
/** Returns the appropriate thread to fetch, given the fetch policy. */
|
||||
ThreadID getFetchingThread();
|
||||
@@ -505,7 +504,7 @@ class DefaultFetch
|
||||
unsigned fetchQueueSize;
|
||||
|
||||
/** Queue of fetched instructions. Per-thread to prevent HoL blocking. */
|
||||
std::deque<DynInstPtr> fetchQueue[O3MaxThreads];
|
||||
std::deque<O3DynInstPtr> fetchQueue[O3MaxThreads];
|
||||
|
||||
/** Whether or not the fetch buffer data is valid. */
|
||||
bool fetchBufferValid[O3MaxThreads];
|
||||
|
||||
@@ -150,7 +150,7 @@ template <class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::regProbePoints()
|
||||
{
|
||||
ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");
|
||||
ppFetch = new ProbePointArg<O3DynInstPtr>(cpu->getProbeManager(), "Fetch");
|
||||
ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
|
||||
"FetchRequest");
|
||||
|
||||
@@ -526,7 +526,7 @@ DefaultFetch<Impl>::deactivateThread(ThreadID tid)
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultFetch<Impl>::lookupAndUpdateNextPC(
|
||||
const DynInstPtr &inst, TheISA::PCState &nextPC)
|
||||
const O3DynInstPtr &inst, TheISA::PCState &nextPC)
|
||||
{
|
||||
// Do branch prediction check here.
|
||||
// A bit of a misnomer...next_PC is actually the current PC until
|
||||
@@ -706,7 +706,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
|
||||
// We will use a nop in ordier to carry the fault.
|
||||
DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
|
||||
O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
|
||||
fetchPC, fetchPC, false);
|
||||
instruction->setNotAnInst();
|
||||
|
||||
@@ -729,7 +729,7 @@ DefaultFetch<Impl>::finishTranslation(const Fault &fault,
|
||||
template <class Impl>
|
||||
inline void
|
||||
DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst, ThreadID tid)
|
||||
const O3DynInstPtr squashInst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
|
||||
tid, newPC);
|
||||
@@ -781,7 +781,7 @@ DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst,
|
||||
const O3DynInstPtr squashInst,
|
||||
const InstSeqNum seq_num, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
|
||||
@@ -851,7 +851,7 @@ DefaultFetch<Impl>::updateFetchStatus()
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
|
||||
const InstSeqNum seq_num, DynInstPtr squashInst,
|
||||
const InstSeqNum seq_num, O3DynInstPtr squashInst,
|
||||
ThreadID tid)
|
||||
{
|
||||
DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
|
||||
@@ -1070,7 +1070,7 @@ DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
StaticInstPtr curMacroop, TheISA::PCState thisPC,
|
||||
TheISA::PCState nextPC, bool trace)
|
||||
@@ -1079,8 +1079,8 @@ DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
InstSeqNum seq = cpu->getAndIncrementInstSeq();
|
||||
|
||||
// Create a new DynInst from the instruction fetched.
|
||||
DynInstPtr instruction =
|
||||
new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
|
||||
O3DynInstPtr instruction =
|
||||
new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
|
||||
instruction->setTid(tid);
|
||||
|
||||
instruction->setThreadState(cpu->thread[tid]);
|
||||
@@ -1297,7 +1297,7 @@ DefaultFetch<Impl>::fetch(bool &status_change)
|
||||
newMacro |= staticInst->isLastMicroop();
|
||||
}
|
||||
|
||||
DynInstPtr instruction =
|
||||
O3DynInstPtr instruction =
|
||||
buildInst(tid, staticInst, curMacroop,
|
||||
thisPC, nextPC, true);
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
@@ -81,7 +82,6 @@ class DefaultIEW
|
||||
{
|
||||
private:
|
||||
//Typedefs from Impl
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::TimeStruct TimeStruct;
|
||||
typedef typename Impl::IEWStruct IEWStruct;
|
||||
@@ -120,12 +120,12 @@ class DefaultIEW
|
||||
StageStatus wbStatus;
|
||||
|
||||
/** Probe points. */
|
||||
ProbePointArg<DynInstPtr> *ppMispredict;
|
||||
ProbePointArg<DynInstPtr> *ppDispatch;
|
||||
ProbePointArg<O3DynInstPtr> *ppMispredict;
|
||||
ProbePointArg<O3DynInstPtr> *ppDispatch;
|
||||
/** To probe when instruction execution begins. */
|
||||
ProbePointArg<DynInstPtr> *ppExecute;
|
||||
ProbePointArg<O3DynInstPtr> *ppExecute;
|
||||
/** To probe when instruction execution is complete. */
|
||||
ProbePointArg<DynInstPtr> *ppToCommit;
|
||||
ProbePointArg<O3DynInstPtr> *ppToCommit;
|
||||
|
||||
public:
|
||||
/** Constructs a DefaultIEW with the given parameters. */
|
||||
@@ -171,24 +171,24 @@ class DefaultIEW
|
||||
void squash(ThreadID tid);
|
||||
|
||||
/** Wakes all dependents of a completed instruction. */
|
||||
void wakeDependents(const DynInstPtr &inst);
|
||||
void wakeDependents(const O3DynInstPtr &inst);
|
||||
|
||||
/** Tells memory dependence unit that a memory instruction needs to be
|
||||
* rescheduled. It will re-execute once replayMemInst() is called.
|
||||
*/
|
||||
void rescheduleMemInst(const DynInstPtr &inst);
|
||||
void rescheduleMemInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Re-executes all rescheduled memory instructions. */
|
||||
void replayMemInst(const DynInstPtr &inst);
|
||||
void replayMemInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Moves memory instruction onto the list of cache blocked instructions */
|
||||
void blockMemInst(const DynInstPtr &inst);
|
||||
void blockMemInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Notifies that the cache has become unblocked */
|
||||
void cacheUnblocked();
|
||||
|
||||
/** Sends an instruction to commit through the time buffer. */
|
||||
void instToCommit(const DynInstPtr &inst);
|
||||
void instToCommit(const O3DynInstPtr &inst);
|
||||
|
||||
/** Inserts unused instructions of a thread into the skid buffer. */
|
||||
void skidInsert(ThreadID tid);
|
||||
@@ -226,7 +226,7 @@ class DefaultIEW
|
||||
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
|
||||
|
||||
/** Check misprediction */
|
||||
void checkMisprediction(const DynInstPtr &inst);
|
||||
void checkMisprediction(const O3DynInstPtr &inst);
|
||||
|
||||
// hardware transactional memory
|
||||
// For debugging purposes, it is useful to keep track of the most recent
|
||||
@@ -242,12 +242,12 @@ class DefaultIEW
|
||||
/** Sends commit proper information for a squash due to a branch
|
||||
* mispredict.
|
||||
*/
|
||||
void squashDueToBranch(const DynInstPtr &inst, ThreadID tid);
|
||||
void squashDueToBranch(const O3DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Sends commit proper information for a squash due to a memory order
|
||||
* violation.
|
||||
*/
|
||||
void squashDueToMemOrder(const DynInstPtr &inst, ThreadID tid);
|
||||
void squashDueToMemOrder(const O3DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Sets Dispatch to blocked, and signals back to other stages to block. */
|
||||
void block(ThreadID tid);
|
||||
@@ -301,7 +301,7 @@ class DefaultIEW
|
||||
|
||||
private:
|
||||
/** Updates execution stats based on the instruction. */
|
||||
void updateExeInstStats(const DynInstPtr &inst);
|
||||
void updateExeInstStats(const O3DynInstPtr &inst);
|
||||
|
||||
/** Pointer to main time buffer used for backwards communication. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
@@ -337,10 +337,10 @@ class DefaultIEW
|
||||
typename TimeBuffer<IEWStruct>::wire toCommit;
|
||||
|
||||
/** Queue of all instructions coming from rename this cycle. */
|
||||
std::queue<DynInstPtr> insts[O3MaxThreads];
|
||||
std::queue<O3DynInstPtr> insts[O3MaxThreads];
|
||||
|
||||
/** Skid buffer between rename and IEW. */
|
||||
std::queue<DynInstPtr> skidBuffer[O3MaxThreads];
|
||||
std::queue<O3DynInstPtr> skidBuffer[O3MaxThreads];
|
||||
|
||||
/** Scoreboard pointer. */
|
||||
Scoreboard* scoreboard;
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/checker/cpu.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/fu_pool.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
@@ -122,20 +123,22 @@ template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::regProbePoints()
|
||||
{
|
||||
ppDispatch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Dispatch");
|
||||
ppMispredict = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Mispredict");
|
||||
ppDispatch = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Dispatch");
|
||||
ppMispredict = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Mispredict");
|
||||
/**
|
||||
* Probe point with dynamic instruction as the argument used to probe when
|
||||
* an instruction starts to execute.
|
||||
*/
|
||||
ppExecute = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(),
|
||||
"Execute");
|
||||
ppExecute = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Execute");
|
||||
/**
|
||||
* Probe point with dynamic instruction as the argument used to probe when
|
||||
* an instruction execution completes and it is marked ready to commit.
|
||||
*/
|
||||
ppToCommit = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(),
|
||||
"ToCommit");
|
||||
ppToCommit = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "ToCommit");
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -461,7 +464,7 @@ DefaultIEW<Impl>::squash(ThreadID tid)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::squashDueToBranch(const DynInstPtr& inst, ThreadID tid)
|
||||
DefaultIEW<Impl>::squashDueToBranch(const O3DynInstPtr& inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(IEW, "[tid:%i] [sn:%llu] Squashing from a specific instruction,"
|
||||
" PC: %s "
|
||||
@@ -487,7 +490,7 @@ DefaultIEW<Impl>::squashDueToBranch(const DynInstPtr& inst, ThreadID tid)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::squashDueToMemOrder(const DynInstPtr& inst, ThreadID tid)
|
||||
DefaultIEW<Impl>::squashDueToMemOrder(const O3DynInstPtr& inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(IEW, "[tid:%i] Memory violation, squashing violator and younger "
|
||||
"insts, PC: %s [sn:%llu].\n", tid, inst->pcState(), inst->seqNum);
|
||||
@@ -550,28 +553,28 @@ DefaultIEW<Impl>::unblock(ThreadID tid)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::wakeDependents(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::wakeDependents(const O3DynInstPtr& inst)
|
||||
{
|
||||
instQueue.wakeDependents(inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::rescheduleMemInst(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::rescheduleMemInst(const O3DynInstPtr& inst)
|
||||
{
|
||||
instQueue.rescheduleMemInst(inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::replayMemInst(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::replayMemInst(const O3DynInstPtr& inst)
|
||||
{
|
||||
instQueue.replayMemInst(inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::blockMemInst(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::blockMemInst(const O3DynInstPtr& inst)
|
||||
{
|
||||
instQueue.blockMemInst(inst);
|
||||
}
|
||||
@@ -585,7 +588,7 @@ DefaultIEW<Impl>::cacheUnblocked()
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::instToCommit(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::instToCommit(const O3DynInstPtr& inst)
|
||||
{
|
||||
// This function should not be called after writebackInsts in a
|
||||
// single cycle. That will cause problems with an instruction
|
||||
@@ -630,7 +633,7 @@ template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::skidInsert(ThreadID tid)
|
||||
{
|
||||
DynInstPtr inst = NULL;
|
||||
O3DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
@@ -927,13 +930,13 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
|
||||
{
|
||||
// Obtain instructions from skid buffer if unblocking, or queue from rename
|
||||
// otherwise.
|
||||
std::queue<DynInstPtr> &insts_to_dispatch =
|
||||
std::queue<O3DynInstPtr> &insts_to_dispatch =
|
||||
dispatchStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid] : insts[tid];
|
||||
|
||||
int insts_to_add = insts_to_dispatch.size();
|
||||
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
bool add_to_iq = false;
|
||||
int dis_num_inst = 0;
|
||||
|
||||
@@ -1208,7 +1211,7 @@ DefaultIEW<Impl>::executeInsts()
|
||||
|
||||
DPRINTF(IEW, "Execute: Executing instructions from IQ.\n");
|
||||
|
||||
DynInstPtr inst = instQueue.getInstToExecute();
|
||||
O3DynInstPtr inst = instQueue.getInstToExecute();
|
||||
|
||||
DPRINTF(IEW, "Execute: Processing PC %s, [tid:%i] [sn:%llu].\n",
|
||||
inst->pcState(), inst->threadNumber,inst->seqNum);
|
||||
@@ -1372,7 +1375,7 @@ DefaultIEW<Impl>::executeInsts()
|
||||
// If there was an ordering violation, then get the
|
||||
// DynInst that caused the violation. Note that this
|
||||
// clears the violation signal.
|
||||
DynInstPtr violator;
|
||||
O3DynInstPtr violator;
|
||||
violator = ldstQueue.getMemDepViolator(tid);
|
||||
|
||||
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
|
||||
@@ -1396,7 +1399,7 @@ DefaultIEW<Impl>::executeInsts()
|
||||
if (ldstQueue.violation(tid)) {
|
||||
assert(inst->isMemRef());
|
||||
|
||||
DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
|
||||
O3DynInstPtr violator = ldstQueue.getMemDepViolator(tid);
|
||||
|
||||
DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: "
|
||||
"%s, inst PC: %s. Addr is: %#x.\n",
|
||||
@@ -1439,7 +1442,7 @@ DefaultIEW<Impl>::writebackInsts()
|
||||
// as part of backwards communication.
|
||||
for (int inst_num = 0; inst_num < wbWidth &&
|
||||
toCommit->insts[inst_num]; inst_num++) {
|
||||
DynInstPtr inst = toCommit->insts[inst_num];
|
||||
O3DynInstPtr inst = toCommit->insts[inst_num];
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %s.\n",
|
||||
@@ -1610,7 +1613,7 @@ DefaultIEW<Impl>::tick()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::updateExeInstStats(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::updateExeInstStats(const O3DynInstPtr& inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
@@ -1642,7 +1645,7 @@ DefaultIEW<Impl>::updateExeInstStats(const DynInstPtr& inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::checkMisprediction(const DynInstPtr& inst)
|
||||
DefaultIEW<Impl>::checkMisprediction(const O3DynInstPtr& inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
|
||||
@@ -32,8 +32,6 @@
|
||||
#include "cpu/o3/comm.hh"
|
||||
|
||||
// Forward declarations.
|
||||
class BaseO3DynInst;
|
||||
|
||||
template <class Impl>
|
||||
class FullO3CPU;
|
||||
|
||||
@@ -66,15 +64,6 @@ struct O3CPUImpl
|
||||
typedef TimeBufStruct<O3CPUImpl> TimeStruct;
|
||||
|
||||
|
||||
/** The DynInst type to be used. */
|
||||
typedef BaseO3DynInst DynInst;
|
||||
|
||||
/** The refcounted DynInst pointer to be used. In most cases this is
|
||||
* what should be used, and not DynInst *.
|
||||
*/
|
||||
typedef RefCountingPtr<DynInst> DynInstPtr;
|
||||
typedef RefCountingPtr<const DynInst> DynInstConstPtr;
|
||||
|
||||
/** The O3CPU type to be used. */
|
||||
typedef FullO3CPU<O3CPUImpl> O3CPU;
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "base/types.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dep_graph.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/mem_dep_unit.hh"
|
||||
#include "cpu/o3/store_set.hh"
|
||||
@@ -89,19 +90,18 @@ class InstructionQueue
|
||||
public:
|
||||
//Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::IssueStruct IssueStruct;
|
||||
typedef typename Impl::TimeStruct TimeStruct;
|
||||
|
||||
// Typedef of iterator through the list of instructions.
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
|
||||
|
||||
/** FU completion event class. */
|
||||
class FUCompletion : public Event
|
||||
{
|
||||
private:
|
||||
/** Executing instruction. */
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
|
||||
/** Index of the FU used for executing. */
|
||||
int fuIdx;
|
||||
@@ -116,7 +116,7 @@ class InstructionQueue
|
||||
|
||||
public:
|
||||
/** Construct a FU completion event. */
|
||||
FUCompletion(const DynInstPtr &_inst, int fu_idx,
|
||||
FUCompletion(const O3DynInstPtr &_inst, int fu_idx,
|
||||
InstructionQueue<Impl> *iq_ptr);
|
||||
|
||||
virtual void process();
|
||||
@@ -177,40 +177,43 @@ class InstructionQueue
|
||||
bool hasReadyInsts();
|
||||
|
||||
/** Inserts a new instruction into the IQ. */
|
||||
void insert(const DynInstPtr &new_inst);
|
||||
void insert(const O3DynInstPtr &new_inst);
|
||||
|
||||
/** Inserts a new, non-speculative instruction into the IQ. */
|
||||
void insertNonSpec(const DynInstPtr &new_inst);
|
||||
void insertNonSpec(const O3DynInstPtr &new_inst);
|
||||
|
||||
/** Inserts a memory or write barrier into the IQ to make sure
|
||||
* loads and stores are ordered properly.
|
||||
*/
|
||||
void insertBarrier(const DynInstPtr &barr_inst);
|
||||
void insertBarrier(const O3DynInstPtr &barr_inst);
|
||||
|
||||
/** Returns the oldest scheduled instruction, and removes it from
|
||||
* the list of instructions waiting to execute.
|
||||
*/
|
||||
DynInstPtr getInstToExecute();
|
||||
O3DynInstPtr getInstToExecute();
|
||||
|
||||
/** Gets a memory instruction that was referred due to a delayed DTB
|
||||
* translation if it is now ready to execute. NULL if none available.
|
||||
*/
|
||||
DynInstPtr getDeferredMemInstToExecute();
|
||||
O3DynInstPtr getDeferredMemInstToExecute();
|
||||
|
||||
/** Gets a memory instruction that was blocked on the cache. NULL if none
|
||||
* available.
|
||||
*/
|
||||
DynInstPtr getBlockedMemInstToExecute();
|
||||
O3DynInstPtr getBlockedMemInstToExecute();
|
||||
|
||||
/**
|
||||
* Records the instruction as the producer of a register without
|
||||
* adding it to the rest of the IQ.
|
||||
*/
|
||||
void recordProducer(const DynInstPtr &inst)
|
||||
{ addToProducers(inst); }
|
||||
void
|
||||
recordProducer(const O3DynInstPtr &inst)
|
||||
{
|
||||
addToProducers(inst);
|
||||
}
|
||||
|
||||
/** Process FU completion event. */
|
||||
void processFUCompletion(const DynInstPtr &inst, int fu_idx);
|
||||
void processFUCompletion(const O3DynInstPtr &inst, int fu_idx);
|
||||
|
||||
/**
|
||||
* Schedules ready instructions, adding the ready ones (oldest first) to
|
||||
@@ -228,34 +231,35 @@ class InstructionQueue
|
||||
void commit(const InstSeqNum &inst, ThreadID tid = 0);
|
||||
|
||||
/** Wakes all dependents of a completed instruction. */
|
||||
int wakeDependents(const DynInstPtr &completed_inst);
|
||||
int wakeDependents(const O3DynInstPtr &completed_inst);
|
||||
|
||||
/** Adds a ready memory instruction to the ready list. */
|
||||
void addReadyMemInst(const DynInstPtr &ready_inst);
|
||||
void addReadyMemInst(const O3DynInstPtr &ready_inst);
|
||||
|
||||
/**
|
||||
* Reschedules a memory instruction. It will be ready to issue once
|
||||
* replayMemInst() is called.
|
||||
*/
|
||||
void rescheduleMemInst(const DynInstPtr &resched_inst);
|
||||
void rescheduleMemInst(const O3DynInstPtr &resched_inst);
|
||||
|
||||
/** Replays a memory instruction. It must be rescheduled first. */
|
||||
void replayMemInst(const DynInstPtr &replay_inst);
|
||||
void replayMemInst(const O3DynInstPtr &replay_inst);
|
||||
|
||||
/**
|
||||
* Defers a memory instruction when its DTB translation incurs a hw
|
||||
* page table walk.
|
||||
*/
|
||||
void deferMemInst(const DynInstPtr &deferred_inst);
|
||||
void deferMemInst(const O3DynInstPtr &deferred_inst);
|
||||
|
||||
/** Defers a memory instruction when it is cache blocked. */
|
||||
void blockMemInst(const DynInstPtr &blocked_inst);
|
||||
void blockMemInst(const O3DynInstPtr &blocked_inst);
|
||||
|
||||
/** Notify instruction queue that a previous blockage has resolved */
|
||||
void cacheUnblocked();
|
||||
|
||||
/** Indicates an ordering violation between a store and a load. */
|
||||
void violation(const DynInstPtr &store, const DynInstPtr &faulting_load);
|
||||
void violation(const O3DynInstPtr &store,
|
||||
const O3DynInstPtr &faulting_load);
|
||||
|
||||
/**
|
||||
* Squashes instructions for a thread. Squashing information is obtained
|
||||
@@ -310,23 +314,23 @@ class InstructionQueue
|
||||
//////////////////////////////////////
|
||||
|
||||
/** List of all the instructions in the IQ (some of which may be issued). */
|
||||
std::list<DynInstPtr> instList[O3MaxThreads];
|
||||
std::list<O3DynInstPtr> instList[O3MaxThreads];
|
||||
|
||||
/** List of instructions that are ready to be executed. */
|
||||
std::list<DynInstPtr> instsToExecute;
|
||||
std::list<O3DynInstPtr> instsToExecute;
|
||||
|
||||
/** List of instructions waiting for their DTB translation to
|
||||
* complete (hw page table walk in progress).
|
||||
*/
|
||||
std::list<DynInstPtr> deferredMemInsts;
|
||||
std::list<O3DynInstPtr> deferredMemInsts;
|
||||
|
||||
/** List of instructions that have been cache blocked. */
|
||||
std::list<DynInstPtr> blockedMemInsts;
|
||||
std::list<O3DynInstPtr> blockedMemInsts;
|
||||
|
||||
/** List of instructions that were cache blocked, but a retry has been seen
|
||||
* since, so they can now be retried. May fail again go on the blocked list.
|
||||
*/
|
||||
std::list<DynInstPtr> retryMemInsts;
|
||||
std::list<O3DynInstPtr> retryMemInsts;
|
||||
|
||||
/**
|
||||
* Struct for comparing entries to be added to the priority queue.
|
||||
@@ -335,16 +339,14 @@ class InstructionQueue
|
||||
* numbers (and hence are older) will be at the top of the
|
||||
* priority queue.
|
||||
*/
|
||||
struct pqCompare
|
||||
struct PqCompare
|
||||
{
|
||||
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
||||
{
|
||||
return lhs->seqNum > rhs->seqNum;
|
||||
}
|
||||
bool operator()(const O3DynInstPtr &lhs,
|
||||
const O3DynInstPtr &rhs) const;
|
||||
};
|
||||
|
||||
typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
|
||||
ReadyInstQueue;
|
||||
typedef std::priority_queue<
|
||||
O3DynInstPtr, std::vector<O3DynInstPtr>, PqCompare> ReadyInstQueue;
|
||||
|
||||
/** List of ready instructions, per op class. They are separated by op
|
||||
* class to allow for easy mapping to FUs.
|
||||
@@ -358,9 +360,9 @@ class InstructionQueue
|
||||
* the sequence number will be available. Thus it is most efficient to be
|
||||
* able to search by the sequence number alone.
|
||||
*/
|
||||
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
|
||||
std::map<InstSeqNum, O3DynInstPtr> nonSpecInsts;
|
||||
|
||||
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
|
||||
typedef typename std::map<InstSeqNum, O3DynInstPtr>::iterator NonSpecMapIt;
|
||||
|
||||
/** Entry for the list age ordering by op class. */
|
||||
struct ListOrderEntry
|
||||
@@ -397,7 +399,7 @@ class InstructionQueue
|
||||
*/
|
||||
void moveToYoungerInst(ListOrderIt age_order_it);
|
||||
|
||||
DependencyGraph<DynInstPtr> dependGraph;
|
||||
DependencyGraph<O3DynInstPtr> dependGraph;
|
||||
|
||||
//////////////////////////////////////
|
||||
// Various parameters
|
||||
@@ -450,13 +452,13 @@ class InstructionQueue
|
||||
std::vector<bool> regScoreboard;
|
||||
|
||||
/** Adds an instruction to the dependency graph, as a consumer. */
|
||||
bool addToDependents(const DynInstPtr &new_inst);
|
||||
bool addToDependents(const O3DynInstPtr &new_inst);
|
||||
|
||||
/** Adds an instruction to the dependency graph, as a producer. */
|
||||
void addToProducers(const DynInstPtr &new_inst);
|
||||
void addToProducers(const O3DynInstPtr &new_inst);
|
||||
|
||||
/** Moves an instruction to the ready queue if it is ready. */
|
||||
void addIfReady(const DynInstPtr &inst);
|
||||
void addIfReady(const O3DynInstPtr &inst);
|
||||
|
||||
/** Debugging function to count how many entries are in the IQ. It does
|
||||
* a linear walk through the instructions, so do not call this function
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "base/logging.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/fu_pool.hh"
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
@@ -59,7 +60,7 @@
|
||||
using std::list;
|
||||
|
||||
template <class Impl>
|
||||
InstructionQueue<Impl>::FUCompletion::FUCompletion(const DynInstPtr &_inst,
|
||||
InstructionQueue<Impl>::FUCompletion::FUCompletion(const O3DynInstPtr &_inst,
|
||||
int fu_idx, InstructionQueue<Impl> *iq_ptr)
|
||||
: Event(Stat_Event_Pri, AutoDelete),
|
||||
inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false)
|
||||
@@ -576,7 +577,7 @@ InstructionQueue<Impl>::hasReadyInsts()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::insert(const DynInstPtr &new_inst)
|
||||
InstructionQueue<Impl>::insert(const O3DynInstPtr &new_inst)
|
||||
{
|
||||
if (new_inst->isFloating()) {
|
||||
iqIOStats.fpInstQueueWrites++;
|
||||
@@ -622,7 +623,7 @@ InstructionQueue<Impl>::insert(const DynInstPtr &new_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::insertNonSpec(const DynInstPtr &new_inst)
|
||||
InstructionQueue<Impl>::insertNonSpec(const O3DynInstPtr &new_inst)
|
||||
{
|
||||
// @todo: Clean up this code; can do it by setting inst as unable
|
||||
// to issue, then calling normal insert on the inst.
|
||||
@@ -669,7 +670,7 @@ InstructionQueue<Impl>::insertNonSpec(const DynInstPtr &new_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
InstructionQueue<Impl>::insertBarrier(const O3DynInstPtr &barr_inst)
|
||||
{
|
||||
memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst);
|
||||
|
||||
@@ -677,11 +678,11 @@ InstructionQueue<Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
InstructionQueue<Impl>::getInstToExecute()
|
||||
{
|
||||
assert(!instsToExecute.empty());
|
||||
DynInstPtr inst = std::move(instsToExecute.front());
|
||||
O3DynInstPtr inst = std::move(instsToExecute.front());
|
||||
instsToExecute.pop_front();
|
||||
if (inst->isFloating()) {
|
||||
iqIOStats.fpInstQueueReads++;
|
||||
@@ -748,7 +749,8 @@ InstructionQueue<Impl>::moveToYoungerInst(ListOrderIt list_order_it)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::processFUCompletion(const DynInstPtr &inst, int fu_idx)
|
||||
InstructionQueue<Impl>::processFUCompletion(
|
||||
const O3DynInstPtr &inst, int fu_idx)
|
||||
{
|
||||
DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum);
|
||||
assert(!cpu->switchedOut());
|
||||
@@ -779,7 +781,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||
|
||||
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
|
||||
|
||||
DynInstPtr mem_inst;
|
||||
O3DynInstPtr mem_inst;
|
||||
while ((mem_inst = std::move(getDeferredMemInstToExecute()))) {
|
||||
addReadyMemInst(mem_inst);
|
||||
}
|
||||
@@ -806,7 +808,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||
|
||||
assert(!readyInsts[op_class].empty());
|
||||
|
||||
DynInstPtr issuing_inst = readyInsts[op_class].top();
|
||||
O3DynInstPtr issuing_inst = readyInsts[op_class].top();
|
||||
|
||||
if (issuing_inst->isFloating()) {
|
||||
iqIOStats.fpInstQueueReads++;
|
||||
@@ -986,7 +988,7 @@ InstructionQueue<Impl>::commit(const InstSeqNum &inst, ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
|
||||
InstructionQueue<Impl>::wakeDependents(const O3DynInstPtr &completed_inst)
|
||||
{
|
||||
int dependents = 0;
|
||||
|
||||
@@ -1054,7 +1056,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
|
||||
|
||||
//Go through the dependency chain, marking the registers as
|
||||
//ready within the waiting instructions.
|
||||
DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
|
||||
O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex());
|
||||
|
||||
while (dep_inst) {
|
||||
DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] "
|
||||
@@ -1086,7 +1088,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::addReadyMemInst(const DynInstPtr &ready_inst)
|
||||
InstructionQueue<Impl>::addReadyMemInst(const O3DynInstPtr &ready_inst)
|
||||
{
|
||||
OpClass op_class = ready_inst->opClass();
|
||||
|
||||
@@ -1109,7 +1111,7 @@ InstructionQueue<Impl>::addReadyMemInst(const DynInstPtr &ready_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::rescheduleMemInst(const DynInstPtr &resched_inst)
|
||||
InstructionQueue<Impl>::rescheduleMemInst(const O3DynInstPtr &resched_inst)
|
||||
{
|
||||
DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum);
|
||||
|
||||
@@ -1123,21 +1125,21 @@ InstructionQueue<Impl>::rescheduleMemInst(const DynInstPtr &resched_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::replayMemInst(const DynInstPtr &replay_inst)
|
||||
InstructionQueue<Impl>::replayMemInst(const O3DynInstPtr &replay_inst)
|
||||
{
|
||||
memDepUnit[replay_inst->threadNumber].replay();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::deferMemInst(const DynInstPtr &deferred_inst)
|
||||
InstructionQueue<Impl>::deferMemInst(const O3DynInstPtr &deferred_inst)
|
||||
{
|
||||
deferredMemInsts.push_back(deferred_inst);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::blockMemInst(const DynInstPtr &blocked_inst)
|
||||
InstructionQueue<Impl>::blockMemInst(const O3DynInstPtr &blocked_inst)
|
||||
{
|
||||
blocked_inst->clearIssued();
|
||||
blocked_inst->clearCanIssue();
|
||||
@@ -1154,13 +1156,13 @@ InstructionQueue<Impl>::cacheUnblocked()
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
InstructionQueue<Impl>::getDeferredMemInstToExecute()
|
||||
{
|
||||
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
|
||||
++it) {
|
||||
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
|
||||
DynInstPtr mem_inst = std::move(*it);
|
||||
O3DynInstPtr mem_inst = std::move(*it);
|
||||
deferredMemInsts.erase(it);
|
||||
return mem_inst;
|
||||
}
|
||||
@@ -1169,13 +1171,13 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
InstructionQueue<Impl>::getBlockedMemInstToExecute()
|
||||
{
|
||||
if (retryMemInsts.empty()) {
|
||||
return nullptr;
|
||||
} else {
|
||||
DynInstPtr mem_inst = std::move(retryMemInsts.front());
|
||||
O3DynInstPtr mem_inst = std::move(retryMemInsts.front());
|
||||
retryMemInsts.pop_front();
|
||||
return mem_inst;
|
||||
}
|
||||
@@ -1183,8 +1185,8 @@ InstructionQueue<Impl>::getBlockedMemInstToExecute()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::violation(const DynInstPtr &store,
|
||||
const DynInstPtr &faulting_load)
|
||||
InstructionQueue<Impl>::violation(const O3DynInstPtr &store,
|
||||
const O3DynInstPtr &faulting_load)
|
||||
{
|
||||
iqIOStats.intInstQueueWrites++;
|
||||
memDepUnit[store->threadNumber].violation(store, faulting_load);
|
||||
@@ -1223,7 +1225,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
while (squash_it != instList[tid].end() &&
|
||||
(*squash_it)->seqNum > squashedSeqNum[tid]) {
|
||||
|
||||
DynInstPtr squashed_inst = (*squash_it);
|
||||
O3DynInstPtr squashed_inst = (*squash_it);
|
||||
if (squashed_inst->isFloating()) {
|
||||
iqIOStats.fpInstQueueWrites++;
|
||||
} else if (squashed_inst->isVector()) {
|
||||
@@ -1329,7 +1331,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
// IQ clears out the heads of the dependency graph only when
|
||||
// instructions reach writeback stage. If an instruction is squashed
|
||||
// before writeback stage, its head of dependency graph would not be
|
||||
// cleared out; it holds the instruction's DynInstPtr. This prevents
|
||||
// cleared out; it holds the instruction's O3DynInstPtr. This prevents
|
||||
// freeing the squashed instruction's DynInst.
|
||||
// Thus, we need to manually clear out the squashed instructions' heads
|
||||
// of dependency graph.
|
||||
@@ -1352,7 +1354,15 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
|
||||
InstructionQueue<Impl>::PqCompare::operator()(
|
||||
const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const
|
||||
{
|
||||
return lhs->seqNum > rhs->seqNum;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
InstructionQueue<Impl>::addToDependents(const O3DynInstPtr &new_inst)
|
||||
{
|
||||
// Loop through the instruction's source registers, adding
|
||||
// them to the dependency list if they are not ready.
|
||||
@@ -1400,7 +1410,7 @@ InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
|
||||
InstructionQueue<Impl>::addToProducers(const O3DynInstPtr &new_inst)
|
||||
{
|
||||
// Nothing really needs to be marked when an instruction becomes
|
||||
// the producer of a register's value, but for convenience a ptr
|
||||
@@ -1436,7 +1446,7 @@ InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
InstructionQueue<Impl>::addIfReady(const DynInstPtr &inst)
|
||||
InstructionQueue<Impl>::addIfReady(const O3DynInstPtr &inst)
|
||||
{
|
||||
// If the instruction now has all of its source registers
|
||||
// available, then add it to the list of ready instructions.
|
||||
|
||||
@@ -53,6 +53,8 @@
|
||||
#include "base/flags.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "cpu/utils.hh"
|
||||
#include "enums/SMTQueuePolicy.hh"
|
||||
#include "mem/port.hh"
|
||||
@@ -74,7 +76,6 @@ class LSQ
|
||||
{
|
||||
public:
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
class LSQRequest;
|
||||
/** Derived class to hold any sender state the LSQ needs. */
|
||||
@@ -93,7 +94,7 @@ class LSQ
|
||||
public:
|
||||
|
||||
/** Instruction which initiated the access to memory. */
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
/** The main packet from a split load, used during writeback. */
|
||||
PacketPtr mainPkt;
|
||||
/** A second packet from a split store that needs sending. */
|
||||
@@ -113,7 +114,7 @@ class LSQ
|
||||
* case the SenderState knows.
|
||||
*/
|
||||
bool deleted;
|
||||
ContextID contextId() { return inst->contextId(); }
|
||||
ContextID contextId();
|
||||
|
||||
/** Completes a packet and returns whether the access is finished. */
|
||||
inline bool isComplete() { return outstanding == 0; }
|
||||
@@ -293,7 +294,7 @@ class LSQ
|
||||
|
||||
public:
|
||||
LSQUnit<Impl>& _port;
|
||||
const DynInstPtr _inst;
|
||||
const O3DynInstPtr _inst;
|
||||
uint32_t _taskId;
|
||||
PacketDataPtr _data;
|
||||
std::vector<PacketPtr> _packets;
|
||||
@@ -308,38 +309,11 @@ class LSQ
|
||||
AtomicOpFunctorPtr _amo_op;
|
||||
protected:
|
||||
LSQUnit<Impl>* lsqUnit() { return &_port; }
|
||||
LSQRequest(LSQUnit<Impl> *port, const DynInstPtr& inst, bool isLoad) :
|
||||
_state(State::NotIssued), _senderState(nullptr),
|
||||
_port(*port), _inst(inst), _data(nullptr),
|
||||
_res(nullptr), _addr(0), _size(0), _flags(0),
|
||||
_numOutstandingPackets(0), _amo_op(nullptr)
|
||||
{
|
||||
flags.set(Flag::IsLoad, isLoad);
|
||||
flags.set(Flag::WbStore,
|
||||
_inst->isStoreConditional() || _inst->isAtomic());
|
||||
flags.set(Flag::IsAtomic, _inst->isAtomic());
|
||||
install();
|
||||
}
|
||||
LSQRequest(LSQUnit<Impl>* port, const DynInstPtr& inst, bool isLoad,
|
||||
const Addr& addr, const uint32_t& size,
|
||||
const Request::Flags& flags_,
|
||||
PacketDataPtr data = nullptr, uint64_t* res = nullptr,
|
||||
AtomicOpFunctorPtr amo_op = nullptr)
|
||||
: _state(State::NotIssued), _senderState(nullptr),
|
||||
numTranslatedFragments(0),
|
||||
numInTranslationFragments(0),
|
||||
_port(*port), _inst(inst), _data(data),
|
||||
_res(res), _addr(addr), _size(size),
|
||||
_flags(flags_),
|
||||
_numOutstandingPackets(0),
|
||||
_amo_op(std::move(amo_op))
|
||||
{
|
||||
flags.set(Flag::IsLoad, isLoad);
|
||||
flags.set(Flag::WbStore,
|
||||
_inst->isStoreConditional() || _inst->isAtomic());
|
||||
flags.set(Flag::IsAtomic, _inst->isAtomic());
|
||||
install();
|
||||
}
|
||||
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad);
|
||||
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
|
||||
const Addr& addr, const uint32_t& size,
|
||||
const Request::Flags& flags_, PacketDataPtr data=nullptr,
|
||||
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr);
|
||||
|
||||
bool
|
||||
isLoad() const
|
||||
@@ -354,21 +328,9 @@ class LSQ
|
||||
}
|
||||
|
||||
/** Install the request in the LQ/SQ. */
|
||||
void install()
|
||||
{
|
||||
if (isLoad()) {
|
||||
_port.loadQueue[_inst->lqIdx].setRequest(this);
|
||||
} else {
|
||||
// Store, StoreConditional, and Atomic requests are pushed
|
||||
// to this storeQueue
|
||||
_port.storeQueue[_inst->sqIdx].setRequest(this);
|
||||
}
|
||||
}
|
||||
virtual bool
|
||||
squashed() const override
|
||||
{
|
||||
return _inst->isSquashed();
|
||||
}
|
||||
void install();
|
||||
|
||||
bool squashed() const override;
|
||||
|
||||
/**
|
||||
* Test if the LSQRequest has been released, i.e. self-owned.
|
||||
@@ -391,7 +353,8 @@ class LSQ
|
||||
* but there is any in-flight translation request to the TLB or access
|
||||
* request to the memory.
|
||||
*/
|
||||
void release(Flag reason)
|
||||
void
|
||||
release(Flag reason)
|
||||
{
|
||||
assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
|
||||
if (!isAnyOutstandingRequest()) {
|
||||
@@ -410,35 +373,14 @@ class LSQ
|
||||
* The request is only added if the mask is empty or if there is at
|
||||
* least an active element in it.
|
||||
*/
|
||||
void
|
||||
addRequest(Addr addr, unsigned size,
|
||||
const std::vector<bool>& byte_enable)
|
||||
{
|
||||
if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
|
||||
auto request = std::make_shared<Request>(
|
||||
addr, size, _flags, _inst->requestorId(),
|
||||
_inst->instAddr(), _inst->contextId(),
|
||||
std::move(_amo_op));
|
||||
request->setByteEnable(byte_enable);
|
||||
_requests.push_back(request);
|
||||
}
|
||||
}
|
||||
void addRequest(Addr addr, unsigned size,
|
||||
const std::vector<bool>& byte_enable);
|
||||
|
||||
/** Destructor.
|
||||
* The LSQRequest owns the request. If the packet has already been
|
||||
* sent, the sender state will be deleted upon receiving the reply.
|
||||
*/
|
||||
virtual ~LSQRequest()
|
||||
{
|
||||
assert(!isAnyOutstandingRequest());
|
||||
_inst->savedReq = nullptr;
|
||||
if (_senderState)
|
||||
delete _senderState;
|
||||
|
||||
for (auto r: _packets)
|
||||
delete r;
|
||||
};
|
||||
|
||||
virtual ~LSQRequest();
|
||||
|
||||
public:
|
||||
/** Convenience getters/setters. */
|
||||
@@ -450,7 +392,7 @@ class LSQ
|
||||
request()->setContext(context_id);
|
||||
}
|
||||
|
||||
const DynInstPtr&
|
||||
const O3DynInstPtr&
|
||||
instruction()
|
||||
{
|
||||
return _inst;
|
||||
@@ -728,7 +670,7 @@ class LSQ
|
||||
using LSQRequest::_numOutstandingPackets;
|
||||
using LSQRequest::_amo_op;
|
||||
public:
|
||||
SingleDataRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
|
||||
SingleDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
|
||||
bool isLoad, const Addr& addr, const uint32_t& size,
|
||||
const Request::Flags& flags_, PacketDataPtr data=nullptr,
|
||||
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr) :
|
||||
@@ -766,7 +708,7 @@ class LSQ
|
||||
using LSQRequest::flags;
|
||||
using LSQRequest::setState;
|
||||
public:
|
||||
HtmCmdRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
|
||||
HtmCmdRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
|
||||
const Request::Flags& flags_);
|
||||
inline virtual ~HtmCmdRequest() {}
|
||||
virtual void initiateTranslation();
|
||||
@@ -813,7 +755,7 @@ class LSQ
|
||||
PacketPtr _mainPacket;
|
||||
|
||||
public:
|
||||
SplitDataRequest(LSQUnit<Impl>* port, const DynInstPtr& inst,
|
||||
SplitDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
|
||||
bool isLoad, const Addr& addr, const uint32_t& size,
|
||||
const Request::Flags & flags_, PacketDataPtr data=nullptr,
|
||||
uint64_t* res=nullptr) :
|
||||
@@ -876,15 +818,15 @@ class LSQ
|
||||
void tick();
|
||||
|
||||
/** Inserts a load into the LSQ. */
|
||||
void insertLoad(const DynInstPtr &load_inst);
|
||||
void insertLoad(const O3DynInstPtr &load_inst);
|
||||
/** Inserts a store into the LSQ. */
|
||||
void insertStore(const DynInstPtr &store_inst);
|
||||
void insertStore(const O3DynInstPtr &store_inst);
|
||||
|
||||
/** Executes a load. */
|
||||
Fault executeLoad(const DynInstPtr &inst);
|
||||
Fault executeLoad(const O3DynInstPtr &inst);
|
||||
|
||||
/** Executes a store. */
|
||||
Fault executeStore(const DynInstPtr &inst);
|
||||
Fault executeStore(const O3DynInstPtr &inst);
|
||||
|
||||
/**
|
||||
* Commits loads up until the given sequence number for a specific thread.
|
||||
@@ -924,7 +866,7 @@ class LSQ
|
||||
bool violation(ThreadID tid) { return thread.at(tid).violation(); }
|
||||
|
||||
/** Gets the instruction that caused the memory ordering violation. */
|
||||
DynInstPtr
|
||||
O3DynInstPtr
|
||||
getMemDepViolator(ThreadID tid)
|
||||
{
|
||||
return thread.at(tid).getMemDepViolator();
|
||||
@@ -1103,7 +1045,7 @@ class LSQ
|
||||
|
||||
void recvTimingSnoopReq(PacketPtr pkt);
|
||||
|
||||
Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
unsigned int size, Addr addr, Request::Flags flags,
|
||||
uint64_t *res, AtomicOpFunctorPtr amo_op,
|
||||
const std::vector<bool>& byte_enable);
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
#include "base/compiler.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "cpu/o3/cpu.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
@@ -59,6 +60,13 @@
|
||||
#include "debug/Writeback.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
|
||||
template <class Impl>
|
||||
ContextID
|
||||
LSQ<Impl>::LSQSenderState::contextId()
|
||||
{
|
||||
return inst->contextId();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, DefaultIEW<Impl> *iew_ptr,
|
||||
const DerivO3CPUParams ¶ms)
|
||||
@@ -220,7 +228,7 @@ LSQ<Impl>::cachePortBusy(bool is_load)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
|
||||
LSQ<Impl>::insertLoad(const O3DynInstPtr &load_inst)
|
||||
{
|
||||
ThreadID tid = load_inst->threadNumber;
|
||||
|
||||
@@ -229,7 +237,7 @@ LSQ<Impl>::insertLoad(const DynInstPtr &load_inst)
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
|
||||
LSQ<Impl>::insertStore(const O3DynInstPtr &store_inst)
|
||||
{
|
||||
ThreadID tid = store_inst->threadNumber;
|
||||
|
||||
@@ -238,7 +246,7 @@ LSQ<Impl>::insertStore(const DynInstPtr &store_inst)
|
||||
|
||||
template<class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::executeLoad(const DynInstPtr &inst)
|
||||
LSQ<Impl>::executeLoad(const O3DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
@@ -247,7 +255,7 @@ LSQ<Impl>::executeLoad(const DynInstPtr &inst)
|
||||
|
||||
template<class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::executeStore(const DynInstPtr &inst)
|
||||
LSQ<Impl>::executeStore(const O3DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
@@ -676,7 +684,7 @@ LSQ<Impl>::dumpInsts() const
|
||||
|
||||
template<class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
LSQ<Impl>::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
unsigned int size, Addr addr, Request::Flags flags,
|
||||
uint64_t *res, AtomicOpFunctorPtr amo_op,
|
||||
const std::vector<bool>& byte_enable)
|
||||
@@ -951,6 +959,85 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
LSQ<Impl>::LSQRequest::LSQRequest(
|
||||
LSQUnit<Impl> *port, const O3DynInstPtr& inst, bool isLoad) :
|
||||
_state(State::NotIssued), _senderState(nullptr),
|
||||
_port(*port), _inst(inst), _data(nullptr),
|
||||
_res(nullptr), _addr(0), _size(0), _flags(0),
|
||||
_numOutstandingPackets(0), _amo_op(nullptr)
|
||||
{
|
||||
flags.set(Flag::IsLoad, isLoad);
|
||||
flags.set(Flag::WbStore,
|
||||
_inst->isStoreConditional() || _inst->isAtomic());
|
||||
flags.set(Flag::IsAtomic, _inst->isAtomic());
|
||||
install();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
LSQ<Impl>::LSQRequest::LSQRequest(
|
||||
LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
|
||||
const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
|
||||
PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op)
|
||||
: _state(State::NotIssued), _senderState(nullptr),
|
||||
numTranslatedFragments(0),
|
||||
numInTranslationFragments(0),
|
||||
_port(*port), _inst(inst), _data(data),
|
||||
_res(res), _addr(addr), _size(size),
|
||||
_flags(flags_),
|
||||
_numOutstandingPackets(0),
|
||||
_amo_op(std::move(amo_op))
|
||||
{
|
||||
flags.set(Flag::IsLoad, isLoad);
|
||||
flags.set(Flag::WbStore,
|
||||
_inst->isStoreConditional() || _inst->isAtomic());
|
||||
flags.set(Flag::IsAtomic, _inst->isAtomic());
|
||||
install();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::LSQRequest::install()
|
||||
{
|
||||
if (isLoad()) {
|
||||
_port.loadQueue[_inst->lqIdx].setRequest(this);
|
||||
} else {
|
||||
// Store, StoreConditional, and Atomic requests are pushed
|
||||
// to this storeQueue
|
||||
_port.storeQueue[_inst->sqIdx].setRequest(this);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool LSQ<Impl>::LSQRequest::squashed() const { return _inst->isSquashed(); }
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::LSQRequest::addRequest(Addr addr, unsigned size,
|
||||
const std::vector<bool>& byte_enable)
|
||||
{
|
||||
if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) {
|
||||
auto request = std::make_shared<Request>(
|
||||
addr, size, _flags, _inst->requestorId(),
|
||||
_inst->instAddr(), _inst->contextId(),
|
||||
std::move(_amo_op));
|
||||
request->setByteEnable(byte_enable);
|
||||
_requests.push_back(request);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
LSQ<Impl>::LSQRequest::~LSQRequest()
|
||||
{
|
||||
assert(!isAnyOutstandingRequest());
|
||||
_inst->savedReq = nullptr;
|
||||
if (_senderState)
|
||||
delete _senderState;
|
||||
|
||||
for (auto r: _packets)
|
||||
delete r;
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
|
||||
@@ -1226,7 +1313,7 @@ LSQ<Impl>::DcachePort::recvReqRetry()
|
||||
|
||||
template<class Impl>
|
||||
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit<Impl>* port,
|
||||
const DynInstPtr& inst,
|
||||
const O3DynInstPtr& inst,
|
||||
const Request::Flags& flags_) :
|
||||
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
|
||||
nullptr, nullptr, nullptr)
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "arch/locked_mem.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
@@ -85,7 +86,6 @@ class LSQUnit
|
||||
static constexpr auto MaxDataBytes = MaxVecRegLenInBytes;
|
||||
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::IssueStruct IssueStruct;
|
||||
|
||||
using LSQSenderState = typename LSQ<Impl>::LSQSenderState;
|
||||
@@ -95,23 +95,17 @@ class LSQUnit
|
||||
{
|
||||
private:
|
||||
/** The instruction. */
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
/** The request. */
|
||||
LSQRequest* req;
|
||||
LSQRequest* req = nullptr;
|
||||
/** The size of the operation. */
|
||||
uint32_t _size;
|
||||
uint32_t _size = 0;
|
||||
/** Valid entry. */
|
||||
bool _valid;
|
||||
public:
|
||||
/** Constructs an empty store queue entry. */
|
||||
LSQEntry()
|
||||
: inst(nullptr), req(nullptr), _size(0), _valid(false)
|
||||
{
|
||||
}
|
||||
bool _valid = false;
|
||||
|
||||
public:
|
||||
~LSQEntry()
|
||||
{
|
||||
inst = nullptr;
|
||||
if (req != nullptr) {
|
||||
req->freeLSQEntry();
|
||||
req = nullptr;
|
||||
@@ -131,13 +125,14 @@ class LSQUnit
|
||||
}
|
||||
|
||||
void
|
||||
set(const DynInstPtr& inst)
|
||||
set(const O3DynInstPtr& inst)
|
||||
{
|
||||
assert(!_valid);
|
||||
this->inst = inst;
|
||||
_valid = true;
|
||||
_size = 0;
|
||||
}
|
||||
|
||||
LSQRequest* request() { return req; }
|
||||
void setRequest(LSQRequest* r) { req = r; }
|
||||
bool hasRequest() { return req != nullptr; }
|
||||
@@ -146,7 +141,7 @@ class LSQUnit
|
||||
bool valid() const { return _valid; }
|
||||
uint32_t& size() { return _size; }
|
||||
const uint32_t& size() const { return _size; }
|
||||
const DynInstPtr& instruction() const { return inst; }
|
||||
const O3DynInstPtr& instruction() const { return inst; }
|
||||
/** @} */
|
||||
};
|
||||
|
||||
@@ -156,32 +151,27 @@ class LSQUnit
|
||||
/** The store data. */
|
||||
char _data[MaxDataBytes];
|
||||
/** Whether or not the store can writeback. */
|
||||
bool _canWB;
|
||||
bool _canWB = false;
|
||||
/** Whether or not the store is committed. */
|
||||
bool _committed;
|
||||
bool _committed = false;
|
||||
/** Whether or not the store is completed. */
|
||||
bool _completed;
|
||||
bool _completed = false;
|
||||
/** Does this request write all zeros and thus doesn't
|
||||
* have any data attached to it. Used for cache block zero
|
||||
* style instructs (ARM DC ZVA; ALPHA WH64)
|
||||
*/
|
||||
bool _isAllZeros;
|
||||
bool _isAllZeros = false;
|
||||
|
||||
public:
|
||||
static constexpr size_t DataSize = sizeof(_data);
|
||||
/** Constructs an empty store queue entry. */
|
||||
SQEntry()
|
||||
: _canWB(false), _committed(false), _completed(false),
|
||||
_isAllZeros(false)
|
||||
{
|
||||
std::memset(_data, 0, DataSize);
|
||||
}
|
||||
|
||||
~SQEntry()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
set(const DynInstPtr& inst)
|
||||
set(const O3DynInstPtr& inst)
|
||||
{
|
||||
LSQEntry::set(inst);
|
||||
}
|
||||
@@ -192,6 +182,7 @@ class LSQUnit
|
||||
LSQEntry::clear();
|
||||
_canWB = _completed = _committed = _isAllZeros = false;
|
||||
}
|
||||
|
||||
/** Member accessors. */
|
||||
/** @{ */
|
||||
bool& canWB() { return _canWB; }
|
||||
@@ -250,11 +241,11 @@ class LSQUnit
|
||||
void takeOverFrom();
|
||||
|
||||
/** Inserts an instruction. */
|
||||
void insert(const DynInstPtr &inst);
|
||||
void insert(const O3DynInstPtr &inst);
|
||||
/** Inserts a load instruction. */
|
||||
void insertLoad(const DynInstPtr &load_inst);
|
||||
void insertLoad(const O3DynInstPtr &load_inst);
|
||||
/** Inserts a store instruction. */
|
||||
void insertStore(const DynInstPtr &store_inst);
|
||||
void insertStore(const O3DynInstPtr &store_inst);
|
||||
|
||||
/** Check for ordering violations in the LSQ. For a store squash if we
|
||||
* ever find a conflicting load. For a load, only squash if we
|
||||
@@ -263,7 +254,7 @@ class LSQUnit
|
||||
* @param inst the instruction to check
|
||||
*/
|
||||
Fault checkViolations(typename LoadQueue::iterator& loadIt,
|
||||
const DynInstPtr& inst);
|
||||
const O3DynInstPtr& inst);
|
||||
|
||||
/** Check if an incoming invalidate hits in the lsq on a load
|
||||
* that might have issued out of order wrt another load beacuse
|
||||
@@ -272,11 +263,11 @@ class LSQUnit
|
||||
void checkSnoop(PacketPtr pkt);
|
||||
|
||||
/** Executes a load instruction. */
|
||||
Fault executeLoad(const DynInstPtr &inst);
|
||||
Fault executeLoad(const O3DynInstPtr &inst);
|
||||
|
||||
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
|
||||
/** Executes a store instruction. */
|
||||
Fault executeStore(const DynInstPtr &inst);
|
||||
Fault executeStore(const O3DynInstPtr &inst);
|
||||
|
||||
/** Commits the head load. */
|
||||
void commitLoad();
|
||||
@@ -302,7 +293,7 @@ class LSQUnit
|
||||
bool violation() { return memDepViolator; }
|
||||
|
||||
/** Returns the memory ordering violator. */
|
||||
DynInstPtr getMemDepViolator();
|
||||
O3DynInstPtr getMemDepViolator();
|
||||
|
||||
/** Returns the number of free LQ entries. */
|
||||
unsigned numFreeLoadEntries();
|
||||
@@ -378,7 +369,7 @@ class LSQUnit
|
||||
void resetState();
|
||||
|
||||
/** Writes back the instruction, sending it to IEW. */
|
||||
void writeback(const DynInstPtr &inst, PacketPtr pkt);
|
||||
void writeback(const O3DynInstPtr &inst, PacketPtr pkt);
|
||||
|
||||
/** Try to finish a previously blocked write back attempt */
|
||||
void writebackBlockedStore();
|
||||
@@ -460,7 +451,7 @@ class LSQUnit
|
||||
{
|
||||
public:
|
||||
/** Constructs a writeback event. */
|
||||
WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt,
|
||||
WritebackEvent(const O3DynInstPtr &_inst, PacketPtr pkt,
|
||||
LSQUnit *lsq_ptr);
|
||||
|
||||
/** Processes the writeback event. */
|
||||
@@ -471,7 +462,7 @@ class LSQUnit
|
||||
|
||||
private:
|
||||
/** Instruction whose results are being written back. */
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
|
||||
/** The packet that would have been sent to memory. */
|
||||
PacketPtr pkt;
|
||||
@@ -552,7 +543,7 @@ class LSQUnit
|
||||
bool storeInFlight;
|
||||
|
||||
/** The oldest load that caused a memory ordering violation. */
|
||||
DynInstPtr memDepViolator;
|
||||
O3DynInstPtr memDepViolator;
|
||||
|
||||
/** Whether or not there is a packet that couldn't be sent because of
|
||||
* a lack of cache ports. */
|
||||
@@ -634,357 +625,4 @@ class LSQUnit
|
||||
typedef CircularQueue<SQEntry> SQueue;
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
{
|
||||
LQEntry& load_req = loadQueue[load_idx];
|
||||
const DynInstPtr& load_inst = load_req.instruction();
|
||||
|
||||
load_req.setRequest(req);
|
||||
assert(load_inst);
|
||||
|
||||
assert(!load_inst->isExecuted());
|
||||
|
||||
// Make sure this isn't a strictly ordered load
|
||||
// A bit of a hackish way to get strictly ordered accesses to work
|
||||
// only if they're at the head of the LSQ and are ready to commit
|
||||
// (at the head of the ROB too).
|
||||
|
||||
if (req->mainRequest()->isStrictlyOrdered() &&
|
||||
(load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
|
||||
// Tell IQ/mem dep unit that this instruction will need to be
|
||||
// rescheduled eventually
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
load_inst->clearIssued();
|
||||
load_inst->effAddrValid(false);
|
||||
++stats.rescheduledLoads;
|
||||
DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
// Must delete request now that it wasn't handed off to
|
||||
// memory. This is quite ugly. @todo: Figure out the proper
|
||||
// place to really handle request deletes.
|
||||
load_req.setRequest(nullptr);
|
||||
req->discard();
|
||||
return std::make_shared<GenericISA::M5PanicFault>(
|
||||
"Strictly ordered load [sn:%llx] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
}
|
||||
|
||||
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
|
||||
"storeHead: %i addr: %#x%s\n",
|
||||
load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
|
||||
req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
|
||||
|
||||
if (req->mainRequest()->isLLSC()) {
|
||||
// Disable recording the result temporarily. Writing to misc
|
||||
// regs normally updates the result, but this is not the
|
||||
// desired behavior when handling store conditionals.
|
||||
load_inst->recordResult(false);
|
||||
TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
|
||||
load_inst->recordResult(true);
|
||||
}
|
||||
|
||||
if (req->mainRequest()->isLocalAccess()) {
|
||||
assert(!load_inst->memData);
|
||||
assert(!load_inst->inHtmTransactionalState());
|
||||
load_inst->memData = new uint8_t[MaxDataBytes];
|
||||
|
||||
ThreadContext *thread = cpu->tcBase(lsqID);
|
||||
PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
|
||||
|
||||
main_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
|
||||
cpu->schedule(wb, cpu->clockEdge(delay));
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
|
||||
{
|
||||
// don't want to send nested transactionStarts and
|
||||
// transactionStops outside of core, e.g. to Ruby
|
||||
if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
|
||||
Cycles delay(0);
|
||||
PacketPtr data_pkt =
|
||||
new Packet(req->mainRequest(), MemCmd::ReadReq);
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData =
|
||||
new uint8_t[req->mainRequest()->getSize()];
|
||||
// sanity checks espect zero in request's data
|
||||
memset(load_inst->memData, 0, req->mainRequest()->getSize());
|
||||
}
|
||||
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
data_pkt->makeResponse();
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
|
||||
cpu->schedule(wb, cpu->clockEdge(delay));
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the SQ for any previous stores that might lead to forwarding
|
||||
auto store_it = load_inst->sqIt;
|
||||
assert (store_it >= storeWBIt);
|
||||
// End once we've reached the top of the LSQ
|
||||
while (store_it != storeWBIt) {
|
||||
// Move the index to one younger
|
||||
store_it--;
|
||||
assert(store_it->valid());
|
||||
assert(store_it->instruction()->seqNum < load_inst->seqNum);
|
||||
int store_size = store_it->size();
|
||||
|
||||
// Cache maintenance instructions go down via the store
|
||||
// path but they carry no data and they shouldn't be
|
||||
// considered for forwarding
|
||||
if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
|
||||
!(store_it->request()->mainRequest() &&
|
||||
store_it->request()->mainRequest()->isCacheMaintenance())) {
|
||||
assert(store_it->instruction()->effAddrValid());
|
||||
|
||||
// Check if the store data is within the lower and upper bounds of
|
||||
// addresses that the request needs.
|
||||
auto req_s = req->mainRequest()->getVaddr();
|
||||
auto req_e = req_s + req->mainRequest()->getSize();
|
||||
auto st_s = store_it->instruction()->effAddr;
|
||||
auto st_e = st_s + store_size;
|
||||
|
||||
bool store_has_lower_limit = req_s >= st_s;
|
||||
bool store_has_upper_limit = req_e <= st_e;
|
||||
bool lower_load_has_store_part = req_s < st_e;
|
||||
bool upper_load_has_store_part = req_e > st_s;
|
||||
|
||||
auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
|
||||
|
||||
// If the store entry is not atomic (atomic does not have valid
|
||||
// data), the store has all of the data needed, and
|
||||
// the load is not LLSC, then
|
||||
// we can forward data from the store to the load
|
||||
if (!store_it->instruction()->isAtomic() &&
|
||||
store_has_lower_limit && store_has_upper_limit &&
|
||||
!req->mainRequest()->isLLSC()) {
|
||||
|
||||
const auto& store_req = store_it->request()->mainRequest();
|
||||
coverage = store_req->isMasked() ?
|
||||
AddrRangeCoverage::PartialAddrRangeCoverage :
|
||||
AddrRangeCoverage::FullAddrRangeCoverage;
|
||||
} else if (
|
||||
// This is the partial store-load forwarding case where a store
|
||||
// has only part of the load's data and the load isn't LLSC
|
||||
(!req->mainRequest()->isLLSC() &&
|
||||
((store_has_lower_limit && lower_load_has_store_part) ||
|
||||
(store_has_upper_limit && upper_load_has_store_part) ||
|
||||
(lower_load_has_store_part && upper_load_has_store_part))) ||
|
||||
// The load is LLSC, and the store has all or part of the
|
||||
// load's data
|
||||
(req->mainRequest()->isLLSC() &&
|
||||
((store_has_lower_limit || upper_load_has_store_part) &&
|
||||
(store_has_upper_limit || lower_load_has_store_part))) ||
|
||||
// The store entry is atomic and has all or part of the load's
|
||||
// data
|
||||
(store_it->instruction()->isAtomic() &&
|
||||
((store_has_lower_limit || upper_load_has_store_part) &&
|
||||
(store_has_upper_limit || lower_load_has_store_part)))) {
|
||||
|
||||
coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
|
||||
}
|
||||
|
||||
if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
|
||||
// Get shift amount for offset into the store's data.
|
||||
int shift_amt = req->mainRequest()->getVaddr() -
|
||||
store_it->instruction()->effAddr;
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData =
|
||||
new uint8_t[req->mainRequest()->getSize()];
|
||||
}
|
||||
if (store_it->isAllZeros())
|
||||
memset(load_inst->memData, 0,
|
||||
req->mainRequest()->getSize());
|
||||
else
|
||||
memcpy(load_inst->memData,
|
||||
store_it->data() + shift_amt,
|
||||
req->mainRequest()->getSize());
|
||||
|
||||
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
|
||||
"addr %#x\n", store_it._idx,
|
||||
req->mainRequest()->getVaddr());
|
||||
|
||||
PacketPtr data_pkt = new Packet(req->mainRequest(),
|
||||
MemCmd::ReadReq);
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
// hardware transactional memory
|
||||
// Store to load forwarding within a transaction
|
||||
// This should be okay because the store will be sent to
|
||||
// the memory subsystem and subsequently get added to the
|
||||
// write set of the transaction. The write set has a stronger
|
||||
// property than the read set, so the load doesn't necessarily
|
||||
// have to be there.
|
||||
assert(!req->mainRequest()->isHTMCmd());
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
assert (!storeQueue[store_it._idx].completed());
|
||||
assert (
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
inHtmTransactionalState());
|
||||
assert (
|
||||
load_inst->getHtmTransactionUid() ==
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
getHtmTransactionUid());
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
|
||||
"pc=0x%lx - vaddr=0x%lx - "
|
||||
"paddr=0x%lx - htmUid=%u\n",
|
||||
load_inst->instAddr(),
|
||||
data_pkt->req->hasVaddr() ?
|
||||
data_pkt->req->getVaddr() : 0lu,
|
||||
data_pkt->getAddr(),
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
|
||||
if (req->isAnyOutstandingRequest()) {
|
||||
assert(req->_numOutstandingPackets > 0);
|
||||
// There are memory requests packets in flight already.
|
||||
// This may happen if the store was not complete the
|
||||
// first time this load got executed. Signal the senderSate
|
||||
// that response packets should be discarded.
|
||||
req->discardSenderState();
|
||||
}
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
|
||||
this);
|
||||
|
||||
// We'll say this has a 1 cycle load-store forwarding latency
|
||||
// for now.
|
||||
// @todo: Need to make this a parameter.
|
||||
cpu->schedule(wb, curTick());
|
||||
|
||||
// Don't need to do anything special for split loads.
|
||||
++stats.forwLoads;
|
||||
|
||||
return NoFault;
|
||||
} else if (coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
|
||||
// If it's already been written back, then don't worry about
|
||||
// stalling on it.
|
||||
if (store_it->completed()) {
|
||||
panic("Should not check one of these");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must stall load and force it to retry, so long as it's the
|
||||
// oldest load that needs to do so.
|
||||
if (!stalled ||
|
||||
(stalled &&
|
||||
load_inst->seqNum <
|
||||
loadQueue[stallingLoadIdx].instruction()->seqNum)) {
|
||||
stalled = true;
|
||||
stallingStoreIsn = store_it->instruction()->seqNum;
|
||||
stallingLoadIdx = load_idx;
|
||||
}
|
||||
|
||||
// Tell IQ/mem dep unit that this instruction will need to be
|
||||
// rescheduled eventually
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
load_inst->clearIssued();
|
||||
load_inst->effAddrValid(false);
|
||||
++stats.rescheduledLoads;
|
||||
|
||||
// Do not generate a writeback event as this instruction is not
|
||||
// complete.
|
||||
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
|
||||
"Store idx %i to load addr %#x\n",
|
||||
store_it._idx, req->mainRequest()->getVaddr());
|
||||
|
||||
// Must discard the request.
|
||||
req->discard();
|
||||
load_req.setRequest(nullptr);
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no forwarding case, then go access memory
|
||||
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
|
||||
}
|
||||
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMCmd()) {
|
||||
// this is a simple sanity check
|
||||
// the Ruby cache controller will set
|
||||
// memData to 0x0ul if successful.
|
||||
*load_inst->memData = (uint64_t) 0x1ull;
|
||||
}
|
||||
|
||||
// For now, load throughput is constrained by the number of
|
||||
// load FUs only, and loads do not consume a cache port (only
|
||||
// stores do).
|
||||
// @todo We should account for cache port contention
|
||||
// and arbitrate between loads and stores.
|
||||
|
||||
// if we the cache is not blocked, do cache access
|
||||
if (req->senderState() == nullptr) {
|
||||
LQSenderState *state = new LQSenderState(
|
||||
loadQueue.getIterator(load_idx));
|
||||
state->isLoad = true;
|
||||
state->inst = load_inst;
|
||||
state->isSplit = req->isSplit();
|
||||
req->senderState(state);
|
||||
}
|
||||
req->buildPackets();
|
||||
req->sendPacketToCache();
|
||||
if (!req->isSent())
|
||||
iewStage->blockMemInst(load_inst);
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
|
||||
{
|
||||
assert(storeQueue[store_idx].valid());
|
||||
|
||||
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
|
||||
"[sn:%llu]\n",
|
||||
store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
|
||||
storeQueue[store_idx].instruction()->seqNum);
|
||||
|
||||
storeQueue[store_idx].setRequest(req);
|
||||
unsigned size = req->_size;
|
||||
storeQueue[store_idx].size() = size;
|
||||
bool store_no_data =
|
||||
req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
|
||||
storeQueue[store_idx].isAllZeros() = store_no_data;
|
||||
assert(size <= SQEntry::DataSize || store_no_data);
|
||||
|
||||
// copy data into the storeQueue only if the store request has valid data
|
||||
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
|
||||
!req->request()->isCacheMaintenance() &&
|
||||
!req->request()->isAtomic())
|
||||
memcpy(storeQueue[store_idx].data(), data, size);
|
||||
|
||||
// This function only writes the data to the store queue, so no fault
|
||||
// can happen here.
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
#endif // __CPU_O3_LSQ_UNIT_HH__
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
#include "mem/request.hh"
|
||||
|
||||
template<class Impl>
|
||||
LSQUnit<Impl>::WritebackEvent::WritebackEvent(const DynInstPtr &_inst,
|
||||
LSQUnit<Impl>::WritebackEvent::WritebackEvent(const O3DynInstPtr &_inst,
|
||||
PacketPtr _pkt, LSQUnit *lsq_ptr)
|
||||
: Event(Default_Pri, AutoDelete),
|
||||
inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr)
|
||||
@@ -112,7 +112,7 @@ void
|
||||
LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||
{
|
||||
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
|
||||
DynInstPtr inst = state->inst;
|
||||
O3DynInstPtr inst = state->inst;
|
||||
|
||||
// hardware transactional memory
|
||||
// sanity check
|
||||
@@ -317,7 +317,7 @@ LSQUnit<Impl>::takeOverFrom()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQUnit<Impl>::insert(const DynInstPtr &inst)
|
||||
LSQUnit<Impl>::insert(const O3DynInstPtr &inst)
|
||||
{
|
||||
assert(inst->isMemRef());
|
||||
|
||||
@@ -334,7 +334,7 @@ LSQUnit<Impl>::insert(const DynInstPtr &inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
|
||||
LSQUnit<Impl>::insertLoad(const O3DynInstPtr &load_inst)
|
||||
{
|
||||
assert(!loadQueue.full());
|
||||
assert(loads < loadQueue.capacity());
|
||||
@@ -397,7 +397,7 @@ LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQUnit<Impl>::insertStore(const DynInstPtr& store_inst)
|
||||
LSQUnit<Impl>::insertStore(const O3DynInstPtr& store_inst)
|
||||
{
|
||||
// Make sure it is not full before inserting an instruction.
|
||||
assert(!storeQueue.full());
|
||||
@@ -418,10 +418,10 @@ LSQUnit<Impl>::insertStore(const DynInstPtr& store_inst)
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
LSQUnit<Impl>::getMemDepViolator()
|
||||
{
|
||||
DynInstPtr temp = memDepViolator;
|
||||
O3DynInstPtr temp = memDepViolator;
|
||||
|
||||
memDepViolator = NULL;
|
||||
|
||||
@@ -475,7 +475,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
|
||||
|
||||
Addr invalidate_addr = pkt->getAddr() & cacheBlockMask;
|
||||
|
||||
DynInstPtr ld_inst = iter->instruction();
|
||||
O3DynInstPtr ld_inst = iter->instruction();
|
||||
assert(ld_inst);
|
||||
LSQRequest *req = iter->request();
|
||||
|
||||
@@ -535,7 +535,7 @@ LSQUnit<Impl>::checkSnoop(PacketPtr pkt)
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
|
||||
const DynInstPtr& inst)
|
||||
const O3DynInstPtr& inst)
|
||||
{
|
||||
Addr inst_eff_addr1 = inst->effAddr >> depCheckShift;
|
||||
Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift;
|
||||
@@ -546,7 +546,7 @@ LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
|
||||
* like the implementation that came before it, we're overly conservative.
|
||||
*/
|
||||
while (loadIt != loadQueue.end()) {
|
||||
DynInstPtr ld_inst = loadIt->instruction();
|
||||
O3DynInstPtr ld_inst = loadIt->instruction();
|
||||
if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) {
|
||||
++loadIt;
|
||||
continue;
|
||||
@@ -615,7 +615,7 @@ LSQUnit<Impl>::checkViolations(typename LoadQueue::iterator& loadIt,
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
|
||||
LSQUnit<Impl>::executeLoad(const O3DynInstPtr &inst)
|
||||
{
|
||||
// Execute a specific load.
|
||||
Fault load_fault = NoFault;
|
||||
@@ -682,7 +682,7 @@ LSQUnit<Impl>::executeLoad(const DynInstPtr &inst)
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::executeStore(const DynInstPtr &store_inst)
|
||||
LSQUnit<Impl>::executeStore(const O3DynInstPtr &store_inst)
|
||||
{
|
||||
// Make sure that a store exists.
|
||||
assert(stores != 0);
|
||||
@@ -837,7 +837,7 @@ LSQUnit<Impl>::writebackStores()
|
||||
assert(storeWBIt->hasRequest());
|
||||
assert(!storeWBIt->committed());
|
||||
|
||||
DynInstPtr inst = storeWBIt->instruction();
|
||||
O3DynInstPtr inst = storeWBIt->instruction();
|
||||
LSQRequest* req = storeWBIt->request();
|
||||
|
||||
// Process store conditionals or store release after all previous
|
||||
@@ -1095,7 +1095,7 @@ LSQUnit<Impl>::storePostSend()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
|
||||
LSQUnit<Impl>::writeback(const O3DynInstPtr &inst, PacketPtr pkt)
|
||||
{
|
||||
iewStage->wakeCPU();
|
||||
|
||||
@@ -1170,7 +1170,7 @@ LSQUnit<Impl>::completeStore(typename StoreQueue::iterator store_idx)
|
||||
|
||||
/* We 'need' a copy here because we may clear the entry from the
|
||||
* store queue. */
|
||||
DynInstPtr store_inst = store_idx->instruction();
|
||||
O3DynInstPtr store_inst = store_idx->instruction();
|
||||
if (store_idx == storeQueue.begin()) {
|
||||
do {
|
||||
storeQueue.front().clear();
|
||||
@@ -1279,7 +1279,7 @@ LSQUnit<Impl>::dumpInsts() const
|
||||
cprintf("Load queue: ");
|
||||
|
||||
for (const auto& e: loadQueue) {
|
||||
const DynInstPtr &inst(e.instruction());
|
||||
const O3DynInstPtr &inst(e.instruction());
|
||||
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
|
||||
}
|
||||
cprintf("\n");
|
||||
@@ -1288,7 +1288,7 @@ LSQUnit<Impl>::dumpInsts() const
|
||||
cprintf("Store queue: ");
|
||||
|
||||
for (const auto& e: storeQueue) {
|
||||
const DynInstPtr &inst(e.instruction());
|
||||
const O3DynInstPtr &inst(e.instruction());
|
||||
cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum);
|
||||
}
|
||||
|
||||
@@ -1302,4 +1302,358 @@ LSQUnit<Impl>::cacheLineSize()
|
||||
return cpu->cacheLineSize();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
{
|
||||
LQEntry& load_req = loadQueue[load_idx];
|
||||
const O3DynInstPtr& load_inst = load_req.instruction();
|
||||
|
||||
load_req.setRequest(req);
|
||||
assert(load_inst);
|
||||
|
||||
assert(!load_inst->isExecuted());
|
||||
|
||||
// Make sure this isn't a strictly ordered load
|
||||
// A bit of a hackish way to get strictly ordered accesses to work
|
||||
// only if they're at the head of the LSQ and are ready to commit
|
||||
// (at the head of the ROB too).
|
||||
|
||||
if (req->mainRequest()->isStrictlyOrdered() &&
|
||||
(load_idx != loadQueue.head() || !load_inst->isAtCommit())) {
|
||||
// Tell IQ/mem dep unit that this instruction will need to be
|
||||
// rescheduled eventually
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
load_inst->clearIssued();
|
||||
load_inst->effAddrValid(false);
|
||||
++stats.rescheduledLoads;
|
||||
DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
// Must delete request now that it wasn't handed off to
|
||||
// memory. This is quite ugly. @todo: Figure out the proper
|
||||
// place to really handle request deletes.
|
||||
load_req.setRequest(nullptr);
|
||||
req->discard();
|
||||
return std::make_shared<GenericISA::M5PanicFault>(
|
||||
"Strictly ordered load [sn:%llx] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
}
|
||||
|
||||
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
|
||||
"storeHead: %i addr: %#x%s\n",
|
||||
load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1,
|
||||
req->mainRequest()->getPaddr(), req->isSplit() ? " split" : "");
|
||||
|
||||
if (req->mainRequest()->isLLSC()) {
|
||||
// Disable recording the result temporarily. Writing to misc
|
||||
// regs normally updates the result, but this is not the
|
||||
// desired behavior when handling store conditionals.
|
||||
load_inst->recordResult(false);
|
||||
TheISA::handleLockedRead(load_inst.get(), req->mainRequest());
|
||||
load_inst->recordResult(true);
|
||||
}
|
||||
|
||||
if (req->mainRequest()->isLocalAccess()) {
|
||||
assert(!load_inst->memData);
|
||||
assert(!load_inst->inHtmTransactionalState());
|
||||
load_inst->memData = new uint8_t[MaxDataBytes];
|
||||
|
||||
ThreadContext *thread = cpu->tcBase(lsqID);
|
||||
PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq);
|
||||
|
||||
main_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt);
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this);
|
||||
cpu->schedule(wb, cpu->clockEdge(delay));
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
|
||||
{
|
||||
// don't want to send nested transactionStarts and
|
||||
// transactionStops outside of core, e.g. to Ruby
|
||||
if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
|
||||
Cycles delay(0);
|
||||
PacketPtr data_pkt =
|
||||
new Packet(req->mainRequest(), MemCmd::ReadReq);
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData =
|
||||
new uint8_t[req->mainRequest()->getSize()];
|
||||
// sanity checks espect zero in request's data
|
||||
memset(load_inst->memData, 0, req->mainRequest()->getSize());
|
||||
}
|
||||
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
data_pkt->makeResponse();
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
|
||||
cpu->schedule(wb, cpu->clockEdge(delay));
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the SQ for any previous stores that might lead to forwarding
|
||||
auto store_it = load_inst->sqIt;
|
||||
assert (store_it >= storeWBIt);
|
||||
// End once we've reached the top of the LSQ
|
||||
while (store_it != storeWBIt) {
|
||||
// Move the index to one younger
|
||||
store_it--;
|
||||
assert(store_it->valid());
|
||||
assert(store_it->instruction()->seqNum < load_inst->seqNum);
|
||||
int store_size = store_it->size();
|
||||
|
||||
// Cache maintenance instructions go down via the store
|
||||
// path but they carry no data and they shouldn't be
|
||||
// considered for forwarding
|
||||
if (store_size != 0 && !store_it->instruction()->strictlyOrdered() &&
|
||||
!(store_it->request()->mainRequest() &&
|
||||
store_it->request()->mainRequest()->isCacheMaintenance())) {
|
||||
assert(store_it->instruction()->effAddrValid());
|
||||
|
||||
// Check if the store data is within the lower and upper bounds of
|
||||
// addresses that the request needs.
|
||||
auto req_s = req->mainRequest()->getVaddr();
|
||||
auto req_e = req_s + req->mainRequest()->getSize();
|
||||
auto st_s = store_it->instruction()->effAddr;
|
||||
auto st_e = st_s + store_size;
|
||||
|
||||
bool store_has_lower_limit = req_s >= st_s;
|
||||
bool store_has_upper_limit = req_e <= st_e;
|
||||
bool lower_load_has_store_part = req_s < st_e;
|
||||
bool upper_load_has_store_part = req_e > st_s;
|
||||
|
||||
auto coverage = AddrRangeCoverage::NoAddrRangeCoverage;
|
||||
|
||||
// If the store entry is not atomic (atomic does not have valid
|
||||
// data), the store has all of the data needed, and
|
||||
// the load is not LLSC, then
|
||||
// we can forward data from the store to the load
|
||||
if (!store_it->instruction()->isAtomic() &&
|
||||
store_has_lower_limit && store_has_upper_limit &&
|
||||
!req->mainRequest()->isLLSC()) {
|
||||
|
||||
const auto& store_req = store_it->request()->mainRequest();
|
||||
coverage = store_req->isMasked() ?
|
||||
AddrRangeCoverage::PartialAddrRangeCoverage :
|
||||
AddrRangeCoverage::FullAddrRangeCoverage;
|
||||
} else if (
|
||||
// This is the partial store-load forwarding case where a store
|
||||
// has only part of the load's data and the load isn't LLSC
|
||||
(!req->mainRequest()->isLLSC() &&
|
||||
((store_has_lower_limit && lower_load_has_store_part) ||
|
||||
(store_has_upper_limit && upper_load_has_store_part) ||
|
||||
(lower_load_has_store_part && upper_load_has_store_part))) ||
|
||||
// The load is LLSC, and the store has all or part of the
|
||||
// load's data
|
||||
(req->mainRequest()->isLLSC() &&
|
||||
((store_has_lower_limit || upper_load_has_store_part) &&
|
||||
(store_has_upper_limit || lower_load_has_store_part))) ||
|
||||
// The store entry is atomic and has all or part of the load's
|
||||
// data
|
||||
(store_it->instruction()->isAtomic() &&
|
||||
((store_has_lower_limit || upper_load_has_store_part) &&
|
||||
(store_has_upper_limit || lower_load_has_store_part)))) {
|
||||
|
||||
coverage = AddrRangeCoverage::PartialAddrRangeCoverage;
|
||||
}
|
||||
|
||||
if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) {
|
||||
// Get shift amount for offset into the store's data.
|
||||
int shift_amt = req->mainRequest()->getVaddr() -
|
||||
store_it->instruction()->effAddr;
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData =
|
||||
new uint8_t[req->mainRequest()->getSize()];
|
||||
}
|
||||
if (store_it->isAllZeros())
|
||||
memset(load_inst->memData, 0,
|
||||
req->mainRequest()->getSize());
|
||||
else
|
||||
memcpy(load_inst->memData,
|
||||
store_it->data() + shift_amt,
|
||||
req->mainRequest()->getSize());
|
||||
|
||||
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
|
||||
"addr %#x\n", store_it._idx,
|
||||
req->mainRequest()->getVaddr());
|
||||
|
||||
PacketPtr data_pkt = new Packet(req->mainRequest(),
|
||||
MemCmd::ReadReq);
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
// hardware transactional memory
|
||||
// Store to load forwarding within a transaction
|
||||
// This should be okay because the store will be sent to
|
||||
// the memory subsystem and subsequently get added to the
|
||||
// write set of the transaction. The write set has a stronger
|
||||
// property than the read set, so the load doesn't necessarily
|
||||
// have to be there.
|
||||
assert(!req->mainRequest()->isHTMCmd());
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
assert (!storeQueue[store_it._idx].completed());
|
||||
assert (
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
inHtmTransactionalState());
|
||||
assert (
|
||||
load_inst->getHtmTransactionUid() ==
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
getHtmTransactionUid());
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
|
||||
"pc=0x%lx - vaddr=0x%lx - "
|
||||
"paddr=0x%lx - htmUid=%u\n",
|
||||
load_inst->instAddr(),
|
||||
data_pkt->req->hasVaddr() ?
|
||||
data_pkt->req->getVaddr() : 0lu,
|
||||
data_pkt->getAddr(),
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
|
||||
if (req->isAnyOutstandingRequest()) {
|
||||
assert(req->_numOutstandingPackets > 0);
|
||||
// There are memory requests packets in flight already.
|
||||
// This may happen if the store was not complete the
|
||||
// first time this load got executed. Signal the senderSate
|
||||
// that response packets should be discarded.
|
||||
req->discardSenderState();
|
||||
}
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt,
|
||||
this);
|
||||
|
||||
// We'll say this has a 1 cycle load-store forwarding latency
|
||||
// for now.
|
||||
// @todo: Need to make this a parameter.
|
||||
cpu->schedule(wb, curTick());
|
||||
|
||||
// Don't need to do anything special for split loads.
|
||||
++stats.forwLoads;
|
||||
|
||||
return NoFault;
|
||||
} else if (
|
||||
coverage == AddrRangeCoverage::PartialAddrRangeCoverage) {
|
||||
// If it's already been written back, then don't worry about
|
||||
// stalling on it.
|
||||
if (store_it->completed()) {
|
||||
panic("Should not check one of these");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must stall load and force it to retry, so long as it's the
|
||||
// oldest load that needs to do so.
|
||||
if (!stalled ||
|
||||
(stalled &&
|
||||
load_inst->seqNum <
|
||||
loadQueue[stallingLoadIdx].instruction()->seqNum)) {
|
||||
stalled = true;
|
||||
stallingStoreIsn = store_it->instruction()->seqNum;
|
||||
stallingLoadIdx = load_idx;
|
||||
}
|
||||
|
||||
// Tell IQ/mem dep unit that this instruction will need to be
|
||||
// rescheduled eventually
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
load_inst->clearIssued();
|
||||
load_inst->effAddrValid(false);
|
||||
++stats.rescheduledLoads;
|
||||
|
||||
// Do not generate a writeback event as this instruction is not
|
||||
// complete.
|
||||
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
|
||||
"Store idx %i to load addr %#x\n",
|
||||
store_it._idx, req->mainRequest()->getVaddr());
|
||||
|
||||
// Must discard the request.
|
||||
req->discard();
|
||||
load_req.setRequest(nullptr);
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no forwarding case, then go access memory
|
||||
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
|
||||
}
|
||||
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMCmd()) {
|
||||
// this is a simple sanity check
|
||||
// the Ruby cache controller will set
|
||||
// memData to 0x0ul if successful.
|
||||
*load_inst->memData = (uint64_t) 0x1ull;
|
||||
}
|
||||
|
||||
// For now, load throughput is constrained by the number of
|
||||
// load FUs only, and loads do not consume a cache port (only
|
||||
// stores do).
|
||||
// @todo We should account for cache port contention
|
||||
// and arbitrate between loads and stores.
|
||||
|
||||
// if we the cache is not blocked, do cache access
|
||||
if (req->senderState() == nullptr) {
|
||||
LQSenderState *state = new LQSenderState(
|
||||
loadQueue.getIterator(load_idx));
|
||||
state->isLoad = true;
|
||||
state->inst = load_inst;
|
||||
state->isSplit = req->isSplit();
|
||||
req->senderState(state);
|
||||
}
|
||||
req->buildPackets();
|
||||
req->sendPacketToCache();
|
||||
if (!req->isSent())
|
||||
iewStage->blockMemInst(load_inst);
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::write(LSQRequest *req, uint8_t *data, int store_idx)
|
||||
{
|
||||
assert(storeQueue[store_idx].valid());
|
||||
|
||||
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i "
|
||||
"[sn:%llu]\n",
|
||||
store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1,
|
||||
storeQueue[store_idx].instruction()->seqNum);
|
||||
|
||||
storeQueue[store_idx].setRequest(req);
|
||||
unsigned size = req->_size;
|
||||
storeQueue[store_idx].size() = size;
|
||||
bool store_no_data =
|
||||
req->mainRequest()->getFlags() & Request::STORE_NO_DATA;
|
||||
storeQueue[store_idx].isAllZeros() = store_no_data;
|
||||
assert(size <= SQEntry::DataSize || store_no_data);
|
||||
|
||||
// copy data into the storeQueue only if the store request has valid data
|
||||
if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) &&
|
||||
!req->request()->isCacheMaintenance() &&
|
||||
!req->request()->isAtomic())
|
||||
memcpy(storeQueue[store_idx].data(), data, size);
|
||||
|
||||
// This function only writes the data to the store queue, so no fault
|
||||
// can happen here.
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
#endif//__CPU_O3_LSQ_UNIT_IMPL_HH__
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "debug/MemDepUnit.hh"
|
||||
|
||||
@@ -85,8 +86,6 @@ class MemDepUnit
|
||||
std::string _name;
|
||||
|
||||
public:
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::DynInstConstPtr DynInstConstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
/** Empty constructor. Must call init() prior to using in this case. */
|
||||
@@ -117,22 +116,22 @@ class MemDepUnit
|
||||
void setIQ(InstructionQueue<Impl> *iq_ptr);
|
||||
|
||||
/** Inserts a memory instruction. */
|
||||
void insert(const DynInstPtr &inst);
|
||||
void insert(const O3DynInstPtr &inst);
|
||||
|
||||
/** Inserts a non-speculative memory instruction. */
|
||||
void insertNonSpec(const DynInstPtr &inst);
|
||||
void insertNonSpec(const O3DynInstPtr &inst);
|
||||
|
||||
/** Inserts a barrier instruction. */
|
||||
void insertBarrier(const DynInstPtr &barr_inst);
|
||||
void insertBarrier(const O3DynInstPtr &barr_inst);
|
||||
|
||||
/** Indicate that an instruction has its registers ready. */
|
||||
void regsReady(const DynInstPtr &inst);
|
||||
void regsReady(const O3DynInstPtr &inst);
|
||||
|
||||
/** Indicate that a non-speculative instruction is ready. */
|
||||
void nonSpecInstReady(const DynInstPtr &inst);
|
||||
void nonSpecInstReady(const O3DynInstPtr &inst);
|
||||
|
||||
/** Reschedules an instruction to be re-executed. */
|
||||
void reschedule(const DynInstPtr &inst);
|
||||
void reschedule(const O3DynInstPtr &inst);
|
||||
|
||||
/** Replays all instructions that have been rescheduled by moving them to
|
||||
* the ready list.
|
||||
@@ -140,7 +139,7 @@ class MemDepUnit
|
||||
void replay();
|
||||
|
||||
/** Notifies completion of an instruction. */
|
||||
void completeInst(const DynInstPtr &inst);
|
||||
void completeInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Squashes all instructions up until a given sequence number for a
|
||||
* specific thread.
|
||||
@@ -148,11 +147,11 @@ class MemDepUnit
|
||||
void squash(const InstSeqNum &squashed_num, ThreadID tid);
|
||||
|
||||
/** Indicates an ordering violation between a store and a younger load. */
|
||||
void violation(const DynInstPtr &store_inst,
|
||||
const DynInstPtr &violating_load);
|
||||
void violation(const O3DynInstPtr &store_inst,
|
||||
const O3DynInstPtr &violating_load);
|
||||
|
||||
/** Issues the given instruction */
|
||||
void issue(const DynInstPtr &inst);
|
||||
void issue(const O3DynInstPtr &inst);
|
||||
|
||||
/** Debugging function to dump the lists of instructions. */
|
||||
void dumpLists();
|
||||
@@ -160,12 +159,12 @@ class MemDepUnit
|
||||
private:
|
||||
|
||||
/** Completes a memory instruction. */
|
||||
void completed(const DynInstPtr &inst);
|
||||
void completed(const O3DynInstPtr &inst);
|
||||
|
||||
/** Wakes any dependents of a memory instruction. */
|
||||
void wakeDependents(const DynInstPtr &inst);
|
||||
void wakeDependents(const O3DynInstPtr &inst);
|
||||
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
typedef typename std::list<O3DynInstPtr>::iterator ListIt;
|
||||
|
||||
class MemDepEntry;
|
||||
|
||||
@@ -179,7 +178,7 @@ class MemDepUnit
|
||||
{
|
||||
public:
|
||||
/** Constructs a memory dependence entry. */
|
||||
MemDepEntry(const DynInstPtr &new_inst)
|
||||
MemDepEntry(const O3DynInstPtr &new_inst)
|
||||
: inst(new_inst), regsReady(false), memDeps(0),
|
||||
completed(false), squashed(false)
|
||||
{
|
||||
@@ -209,7 +208,7 @@ class MemDepUnit
|
||||
std::string name() const { return "memdepentry"; }
|
||||
|
||||
/** The instruction being tracked. */
|
||||
DynInstPtr inst;
|
||||
O3DynInstPtr inst;
|
||||
|
||||
/** The iterator to the instruction's location inside the list. */
|
||||
ListIt listIt;
|
||||
@@ -235,10 +234,10 @@ class MemDepUnit
|
||||
};
|
||||
|
||||
/** Finds the memory dependence entry in the hash map. */
|
||||
inline MemDepEntryPtr &findInHash(const DynInstConstPtr& inst);
|
||||
MemDepEntryPtr &findInHash(const O3DynInstConstPtr& inst);
|
||||
|
||||
/** Moves an entry to the ready list. */
|
||||
inline void moveToReady(MemDepEntryPtr &ready_inst_entry);
|
||||
void moveToReady(MemDepEntryPtr &ready_inst_entry);
|
||||
|
||||
typedef std::unordered_map<InstSeqNum, MemDepEntryPtr, SNHash> MemDepHash;
|
||||
|
||||
@@ -248,10 +247,10 @@ class MemDepUnit
|
||||
MemDepHash memDepHash;
|
||||
|
||||
/** A list of all instructions in the memory dependence unit. */
|
||||
std::list<DynInstPtr> instList[O3MaxThreads];
|
||||
std::list<O3DynInstPtr> instList[O3MaxThreads];
|
||||
|
||||
/** A list of all instructions that are going to be replayed. */
|
||||
std::list<DynInstPtr> instsToReplay;
|
||||
std::list<O3DynInstPtr> instsToReplay;
|
||||
|
||||
/** The memory dependence predictor. It is accessed upon new
|
||||
* instructions being added to the IQ, and responds by telling
|
||||
@@ -273,7 +272,7 @@ class MemDepUnit
|
||||
bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); }
|
||||
|
||||
/** Inserts the SN of a barrier inst. to the list of tracked barriers */
|
||||
void insertBarrierSN(const DynInstPtr &barr_inst);
|
||||
void insertBarrierSN(const O3DynInstPtr &barr_inst);
|
||||
|
||||
/** Pointer to the IQ. */
|
||||
InstructionQueue<Impl> *iqPtr;
|
||||
|
||||
@@ -172,7 +172,7 @@ MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const O3DynInstPtr &barr_inst)
|
||||
{
|
||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||
|
||||
@@ -205,7 +205,7 @@ MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::insert(const O3DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
@@ -316,7 +316,7 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const O3DynInstPtr &inst)
|
||||
{
|
||||
insertBarrier(inst);
|
||||
|
||||
@@ -338,7 +338,7 @@ MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrier(const O3DynInstPtr &barr_inst)
|
||||
{
|
||||
ThreadID tid = barr_inst->threadNumber;
|
||||
|
||||
@@ -361,7 +361,7 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::regsReady(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::regsReady(const O3DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Marking registers as ready for "
|
||||
"instruction PC %s [sn:%lli].\n",
|
||||
@@ -384,7 +384,7 @@ MemDepUnit<MemDepPred, Impl>::regsReady(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const O3DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Marking non speculative "
|
||||
"instruction PC %s as ready [sn:%lli].\n",
|
||||
@@ -397,7 +397,7 @@ MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::reschedule(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::reschedule(const O3DynInstPtr &inst)
|
||||
{
|
||||
instsToReplay.push_back(inst);
|
||||
}
|
||||
@@ -406,7 +406,7 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::replay()
|
||||
{
|
||||
DynInstPtr temp_inst;
|
||||
O3DynInstPtr temp_inst;
|
||||
|
||||
// For now this replay function replays all waiting memory ops.
|
||||
while (!instsToReplay.empty()) {
|
||||
@@ -425,7 +425,7 @@ MemDepUnit<MemDepPred, Impl>::replay()
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::completed(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::completed(const O3DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
@@ -449,7 +449,7 @@ MemDepUnit<MemDepPred, Impl>::completed(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::completeInst(const O3DynInstPtr &inst)
|
||||
{
|
||||
wakeDependents(inst);
|
||||
completed(inst);
|
||||
@@ -481,7 +481,7 @@ MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::wakeDependents(const O3DynInstPtr &inst)
|
||||
{
|
||||
// Only stores, atomics and barriers have dependents.
|
||||
if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() &&
|
||||
@@ -570,8 +570,8 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::violation(const DynInstPtr &store_inst,
|
||||
const DynInstPtr &violating_load)
|
||||
MemDepUnit<MemDepPred, Impl>::violation(const O3DynInstPtr &store_inst,
|
||||
const O3DynInstPtr &violating_load)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
|
||||
" load: %#x, store: %#x\n", violating_load->instAddr(),
|
||||
@@ -582,7 +582,7 @@ MemDepUnit<MemDepPred, Impl>::violation(const DynInstPtr &store_inst,
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::issue(const DynInstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::issue(const O3DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
|
||||
inst->instAddr(), inst->seqNum);
|
||||
@@ -592,7 +592,7 @@ MemDepUnit<MemDepPred, Impl>::issue(const DynInstPtr &inst)
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
|
||||
MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstConstPtr &inst)
|
||||
MemDepUnit<MemDepPred, Impl>::findInHash(const O3DynInstConstPtr &inst)
|
||||
{
|
||||
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/reg_class.hh"
|
||||
#include "debug/ElasticTrace.hh"
|
||||
#include "mem/packet.hh"
|
||||
@@ -124,21 +125,21 @@ ElasticTrace::regEtraceListeners()
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace, RequestPtr>(this,
|
||||
"FetchRequest", &ElasticTrace::fetchReqTrace));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace,
|
||||
DynInstConstPtr>(this, "Execute",
|
||||
O3DynInstConstPtr>(this, "Execute",
|
||||
&ElasticTrace::recordExecTick));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace,
|
||||
DynInstConstPtr>(this, "ToCommit",
|
||||
O3DynInstConstPtr>(this, "ToCommit",
|
||||
&ElasticTrace::recordToCommTick));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace,
|
||||
DynInstConstPtr>(this, "Rename",
|
||||
O3DynInstConstPtr>(this, "Rename",
|
||||
&ElasticTrace::updateRegDep));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace, SeqNumRegPair>(this,
|
||||
"SquashInRename", &ElasticTrace::removeRegDepMapEntry));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace,
|
||||
DynInstConstPtr>(this, "Squash",
|
||||
O3DynInstConstPtr>(this, "Squash",
|
||||
&ElasticTrace::addSquashedInst));
|
||||
listeners.push_back(new ProbeListenerArg<ElasticTrace,
|
||||
DynInstConstPtr>(this, "Commit",
|
||||
O3DynInstConstPtr>(this, "Commit",
|
||||
&ElasticTrace::addCommittedInst));
|
||||
allProbesReg = true;
|
||||
}
|
||||
@@ -166,7 +167,7 @@ ElasticTrace::fetchReqTrace(const RequestPtr &req)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
|
||||
ElasticTrace::recordExecTick(const O3DynInstConstPtr& dyn_inst)
|
||||
{
|
||||
|
||||
// In a corner case, a retired instruction is propagated backward to the
|
||||
@@ -203,7 +204,7 @@ ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
|
||||
ElasticTrace::recordToCommTick(const O3DynInstConstPtr& dyn_inst)
|
||||
{
|
||||
// If tracing has just been enabled then the instruction at this stage of
|
||||
// execution is far enough that we cannot gather info about its past like
|
||||
@@ -224,7 +225,7 @@ ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
|
||||
ElasticTrace::updateRegDep(const O3DynInstConstPtr& dyn_inst)
|
||||
{
|
||||
// Get the sequence number of the instruction
|
||||
InstSeqNum seq_num = dyn_inst->seqNum;
|
||||
@@ -303,7 +304,7 @@ ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
|
||||
ElasticTrace::addSquashedInst(const O3DynInstConstPtr& head_inst)
|
||||
{
|
||||
// If the squashed instruction was squashed before being processed by
|
||||
// execute stage then it will not be in the temporary store. In this case
|
||||
@@ -331,7 +332,7 @@ ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
|
||||
ElasticTrace::addCommittedInst(const O3DynInstConstPtr& head_inst)
|
||||
{
|
||||
DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
|
||||
head_inst->seqNum);
|
||||
@@ -390,7 +391,7 @@ ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst)
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
|
||||
ElasticTrace::addDepTraceRecord(const O3DynInstConstPtr& head_inst,
|
||||
InstExecInfo* exec_info_ptr, bool commit)
|
||||
{
|
||||
// Create a record to assign dynamic intruction related fields.
|
||||
@@ -652,7 +653,7 @@ ElasticTrace::hasCompCompleted(TraceInfo* past_record,
|
||||
}
|
||||
|
||||
void
|
||||
ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst)
|
||||
ElasticTrace::clearTempStoreUntil(const O3DynInstConstPtr& head_inst)
|
||||
{
|
||||
// Clear from temp store starting with the execution info object
|
||||
// corresponding the head_inst and continue clearing by decrementing the
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "params/ElasticTrace.hh"
|
||||
@@ -85,8 +85,6 @@ class ElasticTrace : public ProbeListenerObject
|
||||
{
|
||||
|
||||
public:
|
||||
typedef typename O3CPUImpl::DynInstPtr DynInstPtr;
|
||||
typedef typename O3CPUImpl::DynInstConstPtr DynInstConstPtr;
|
||||
typedef typename std::pair<InstSeqNum, RegIndex> SeqNumRegPair;
|
||||
|
||||
/** Trace record types corresponding to instruction node types */
|
||||
@@ -129,7 +127,7 @@ class ElasticTrace : public ProbeListenerObject
|
||||
*
|
||||
* @param dyn_inst pointer to dynamic instruction in flight
|
||||
*/
|
||||
void recordExecTick(const DynInstConstPtr& dyn_inst);
|
||||
void recordExecTick(const O3DynInstConstPtr& dyn_inst);
|
||||
|
||||
/**
|
||||
* Populate the timestamp field in an InstExecInfo object for an
|
||||
@@ -138,7 +136,7 @@ class ElasticTrace : public ProbeListenerObject
|
||||
*
|
||||
* @param dyn_inst pointer to dynamic instruction in flight
|
||||
*/
|
||||
void recordToCommTick(const DynInstConstPtr& dyn_inst);
|
||||
void recordToCommTick(const O3DynInstConstPtr& dyn_inst);
|
||||
|
||||
/**
|
||||
* Record a Read After Write physical register dependency if there has
|
||||
@@ -149,7 +147,7 @@ class ElasticTrace : public ProbeListenerObject
|
||||
*
|
||||
* @param dyn_inst pointer to dynamic instruction in flight
|
||||
*/
|
||||
void updateRegDep(const DynInstConstPtr& dyn_inst);
|
||||
void updateRegDep(const O3DynInstConstPtr& dyn_inst);
|
||||
|
||||
/**
|
||||
* When an instruction gets squashed the destination register mapped to it
|
||||
@@ -166,14 +164,14 @@ class ElasticTrace : public ProbeListenerObject
|
||||
*
|
||||
* @param head_inst pointer to dynamic instruction to be squashed
|
||||
*/
|
||||
void addSquashedInst(const DynInstConstPtr& head_inst);
|
||||
void addSquashedInst(const O3DynInstConstPtr& head_inst);
|
||||
|
||||
/**
|
||||
* Add an instruction that is at the head of the ROB and is committed.
|
||||
*
|
||||
* @param head_inst pointer to dynamic instruction to be committed
|
||||
*/
|
||||
void addCommittedInst(const DynInstConstPtr& head_inst);
|
||||
void addCommittedInst(const O3DynInstConstPtr& head_inst);
|
||||
|
||||
/** Event to trigger registering this listener for all probe points. */
|
||||
EventFunctionWrapper regEtraceListenersEvent;
|
||||
@@ -379,7 +377,7 @@ class ElasticTrace : public ProbeListenerObject
|
||||
* @param exec_info_ptr Pointer to InstExecInfo for that instruction
|
||||
* @param commit True if instruction is committed, false if squashed
|
||||
*/
|
||||
void addDepTraceRecord(const DynInstConstPtr& head_inst,
|
||||
void addDepTraceRecord(const O3DynInstConstPtr& head_inst,
|
||||
InstExecInfo* exec_info_ptr, bool commit);
|
||||
|
||||
/**
|
||||
@@ -388,7 +386,7 @@ class ElasticTrace : public ProbeListenerObject
|
||||
*
|
||||
* @param head_inst pointer to dynamic instruction
|
||||
*/
|
||||
void clearTempStoreUntil(const DynInstConstPtr& head_inst);
|
||||
void clearTempStoreUntil(const O3DynInstConstPtr& head_inst);
|
||||
|
||||
/**
|
||||
* Calculate the computational delay between an instruction and a
|
||||
|
||||
@@ -38,16 +38,17 @@
|
||||
#include "cpu/o3/probe/simple_trace.hh"
|
||||
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "debug/SimpleTrace.hh"
|
||||
|
||||
void SimpleTrace::traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst)
|
||||
void SimpleTrace::traceCommit(const O3DynInstConstPtr& dynInst)
|
||||
{
|
||||
DPRINTFR(SimpleTrace, "[%s]: Commit 0x%08x %s.\n", name(),
|
||||
dynInst->instAddr(),
|
||||
dynInst->staticInst->disassemble(dynInst->instAddr()));
|
||||
}
|
||||
|
||||
void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst)
|
||||
void SimpleTrace::traceFetch(const O3DynInstConstPtr& dynInst)
|
||||
{
|
||||
DPRINTFR(SimpleTrace, "[%s]: Fetch 0x%08x %s.\n", name(),
|
||||
dynInst->instAddr(),
|
||||
@@ -57,7 +58,7 @@ void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst)
|
||||
void SimpleTrace::regProbeListeners()
|
||||
{
|
||||
typedef ProbeListenerArg<SimpleTrace,
|
||||
O3CPUImpl::DynInstConstPtr> DynInstListener;
|
||||
O3DynInstConstPtr> DynInstListener;
|
||||
listeners.push_back(new DynInstListener(this, "Commit",
|
||||
&SimpleTrace::traceCommit));
|
||||
listeners.push_back(new DynInstListener(this, "Fetch",
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
#ifndef __CPU_O3_PROBE_SIMPLE_TRACE_HH__
|
||||
#define __CPU_O3_PROBE_SIMPLE_TRACE_HH__
|
||||
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "params/SimpleTrace.hh"
|
||||
#include "sim/probe/probe.hh"
|
||||
@@ -69,8 +69,8 @@ class SimpleTrace : public ProbeListenerObject
|
||||
}
|
||||
|
||||
private:
|
||||
void traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst);
|
||||
void traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst);
|
||||
void traceFetch(const O3DynInstConstPtr& dynInst);
|
||||
void traceCommit(const O3DynInstConstPtr& dynInst);
|
||||
|
||||
};
|
||||
#endif//__CPU_O3_PROBE_SIMPLE_TRACE_HH__
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include "base/statistics.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/free_list.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
@@ -73,7 +74,6 @@ class DefaultRename
|
||||
{
|
||||
public:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DecodeStruct DecodeStruct;
|
||||
typedef typename Impl::RenameStruct RenameStruct;
|
||||
@@ -83,7 +83,7 @@ class DefaultRename
|
||||
// be added to the front of the queue, which is the only reason for
|
||||
// using a deque instead of a queue. (Most other stages use a
|
||||
// queue)
|
||||
typedef std::deque<DynInstPtr> InstQueue;
|
||||
typedef std::deque<O3DynInstPtr> InstQueue;
|
||||
|
||||
public:
|
||||
/** Overall rename status. Used to determine if the CPU can
|
||||
@@ -117,7 +117,7 @@ class DefaultRename
|
||||
/** Probe points. */
|
||||
typedef typename std::pair<InstSeqNum, PhysRegIdPtr> SeqNumRegPair;
|
||||
/** To probe when register renaming for an instruction is complete */
|
||||
ProbePointArg<DynInstPtr> *ppRename;
|
||||
ProbePointArg<O3DynInstPtr> *ppRename;
|
||||
/**
|
||||
* To probe when an instruction is squashed and the register mapping
|
||||
* for it needs to be undone
|
||||
@@ -248,22 +248,22 @@ class DefaultRename
|
||||
void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid);
|
||||
|
||||
/** Renames the source registers of an instruction. */
|
||||
inline void renameSrcRegs(const DynInstPtr &inst, ThreadID tid);
|
||||
void renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Renames the destination registers of an instruction. */
|
||||
inline void renameDestRegs(const DynInstPtr &inst, ThreadID tid);
|
||||
void renameDestRegs(const O3DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Calculates the number of free ROB entries for a specific thread. */
|
||||
inline int calcFreeROBEntries(ThreadID tid);
|
||||
int calcFreeROBEntries(ThreadID tid);
|
||||
|
||||
/** Calculates the number of free IQ entries for a specific thread. */
|
||||
inline int calcFreeIQEntries(ThreadID tid);
|
||||
int calcFreeIQEntries(ThreadID tid);
|
||||
|
||||
/** Calculates the number of free LQ entries for a specific thread. */
|
||||
inline int calcFreeLQEntries(ThreadID tid);
|
||||
int calcFreeLQEntries(ThreadID tid);
|
||||
|
||||
/** Calculates the number of free SQ entries for a specific thread. */
|
||||
inline int calcFreeSQEntries(ThreadID tid);
|
||||
int calcFreeSQEntries(ThreadID tid);
|
||||
|
||||
/** Returns the number of valid instructions coming from decode. */
|
||||
unsigned validInsts();
|
||||
@@ -417,7 +417,7 @@ class DefaultRename
|
||||
Stalls stalls[O3MaxThreads];
|
||||
|
||||
/** The serialize instruction that rename has stalled on. */
|
||||
DynInstPtr serializeInst[O3MaxThreads];
|
||||
O3DynInstPtr serializeInst[O3MaxThreads];
|
||||
|
||||
/** Records if rename needs to serialize on the next instruction for any
|
||||
* thread.
|
||||
|
||||
@@ -177,7 +177,8 @@ template <class Impl>
|
||||
void
|
||||
DefaultRename<Impl>::regProbePoints()
|
||||
{
|
||||
ppRename = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Rename");
|
||||
ppRename = new ProbePointArg<O3DynInstPtr>(
|
||||
cpu->getProbeManager(), "Rename");
|
||||
ppSquashInRename = new ProbePointArg<SeqNumRegPair>(cpu->getProbeManager(),
|
||||
"SquashInRename");
|
||||
}
|
||||
@@ -612,11 +613,12 @@ DefaultRename<Impl>::renameInsts(ThreadID tid)
|
||||
|
||||
assert(!insts_to_rename.empty());
|
||||
|
||||
DynInstPtr inst = insts_to_rename.front();
|
||||
O3DynInstPtr inst = insts_to_rename.front();
|
||||
|
||||
//For all kind of instructions, check ROB and IQ first
|
||||
//For load instruction, check LQ size and take into account the inflight loads
|
||||
//For store instruction, check SQ size and take into account the inflight stores
|
||||
//For all kind of instructions, check ROB and IQ first For load
|
||||
//instruction, check LQ size and take into account the inflight loads
|
||||
//For store instruction, check SQ size and take into account the
|
||||
//inflight stores
|
||||
|
||||
if (inst->isLoad()) {
|
||||
if (calcFreeLQEntries(tid) <= 0) {
|
||||
@@ -774,7 +776,7 @@ template<class Impl>
|
||||
void
|
||||
DefaultRename<Impl>::skidInsert(ThreadID tid)
|
||||
{
|
||||
DynInstPtr inst = NULL;
|
||||
O3DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
@@ -811,7 +813,7 @@ DefaultRename<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_decode = fromDecode->size;
|
||||
for (int i = 0; i < insts_from_decode; ++i) {
|
||||
const DynInstPtr &inst = fromDecode->insts[i];
|
||||
const O3DynInstPtr &inst = fromDecode->insts[i];
|
||||
insts[inst->threadNumber].push_back(inst);
|
||||
#if TRACING_ON
|
||||
if (Debug::O3PipeView) {
|
||||
@@ -1035,7 +1037,7 @@ DefaultRename<Impl>::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
inline void
|
||||
DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
|
||||
DefaultRename<Impl>::renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
ThreadContext *tc = inst->tcBase();
|
||||
UnifiedRenameMap *map = renameMap[tid];
|
||||
@@ -1102,7 +1104,7 @@ DefaultRename<Impl>::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
inline void
|
||||
DefaultRename<Impl>::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
|
||||
DefaultRename<Impl>::renameDestRegs(const O3DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
ThreadContext *tc = inst->tcBase();
|
||||
UnifiedRenameMap *map = renameMap[tid];
|
||||
@@ -1369,7 +1371,7 @@ DefaultRename<Impl>::checkSignalsAndUpdate(ThreadID tid)
|
||||
DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to "
|
||||
"unblocking.\n", tid);
|
||||
|
||||
DynInstPtr serial_inst = serializeInst[tid];
|
||||
O3DynInstPtr serial_inst = serializeInst[tid];
|
||||
|
||||
renameStatus[tid] = Unblocking;
|
||||
|
||||
|
||||
@@ -60,10 +60,9 @@ class ROB
|
||||
public:
|
||||
//Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
typedef std::pair<RegIndex, RegIndex> UnmapInfo;
|
||||
typedef typename std::list<DynInstPtr>::iterator InstIt;
|
||||
typedef typename std::list<O3DynInstPtr>::iterator InstIt;
|
||||
|
||||
/** Possible ROB statuses. */
|
||||
enum Status
|
||||
@@ -105,36 +104,36 @@ class ROB
|
||||
* ROB for the new instruction.
|
||||
* @param inst The instruction being inserted into the ROB.
|
||||
*/
|
||||
void insertInst(const DynInstPtr &inst);
|
||||
void insertInst(const O3DynInstPtr &inst);
|
||||
|
||||
/** Returns pointer to the head instruction within the ROB. There is
|
||||
* no guarantee as to the return value if the ROB is empty.
|
||||
* @retval Pointer to the DynInst that is at the head of the ROB.
|
||||
*/
|
||||
// DynInstPtr readHeadInst();
|
||||
// O3DynInstPtr readHeadInst();
|
||||
|
||||
/** Returns a pointer to the head instruction of a specific thread within
|
||||
* the ROB.
|
||||
* @return Pointer to the DynInst that is at the head of the ROB.
|
||||
*/
|
||||
const DynInstPtr &readHeadInst(ThreadID tid);
|
||||
const O3DynInstPtr &readHeadInst(ThreadID tid);
|
||||
|
||||
/** Returns a pointer to the instruction with the given sequence if it is
|
||||
* in the ROB.
|
||||
*/
|
||||
DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
|
||||
O3DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
|
||||
|
||||
/** Returns pointer to the tail instruction within the ROB. There is
|
||||
* no guarantee as to the return value if the ROB is empty.
|
||||
* @retval Pointer to the DynInst that is at the tail of the ROB.
|
||||
*/
|
||||
// DynInstPtr readTailInst();
|
||||
// O3DynInstPtr readTailInst();
|
||||
|
||||
/** Returns a pointer to the tail instruction of a specific thread within
|
||||
* the ROB.
|
||||
* @return Pointer to the DynInst that is at the tail of the ROB.
|
||||
*/
|
||||
DynInstPtr readTailInst(ThreadID tid);
|
||||
O3DynInstPtr readTailInst(ThreadID tid);
|
||||
|
||||
/** Retires the head instruction, removing it from the ROB. */
|
||||
// void retireHead();
|
||||
@@ -277,7 +276,7 @@ class ROB
|
||||
unsigned maxEntries[O3MaxThreads];
|
||||
|
||||
/** ROB List of Instructions */
|
||||
std::list<DynInstPtr> instList[O3MaxThreads];
|
||||
std::list<O3DynInstPtr> instList[O3MaxThreads];
|
||||
|
||||
/** Number of instructions that can be squashed in a single cycle. */
|
||||
unsigned squashWidth;
|
||||
@@ -308,7 +307,7 @@ class ROB
|
||||
int numInstsInROB;
|
||||
|
||||
/** Dummy instruction returned if there are no insts left. */
|
||||
DynInstPtr dummyInst;
|
||||
O3DynInstPtr dummyInst;
|
||||
|
||||
private:
|
||||
/** The sequence number of the squashed instruction. */
|
||||
|
||||
@@ -200,7 +200,7 @@ ROB<Impl>::countInsts(ThreadID tid)
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::insertInst(const DynInstPtr &inst)
|
||||
ROB<Impl>::insertInst(const O3DynInstPtr &inst)
|
||||
{
|
||||
assert(inst);
|
||||
|
||||
@@ -246,7 +246,7 @@ ROB<Impl>::retireHead(ThreadID tid)
|
||||
// Get the head ROB instruction by copying it and remove it from the list
|
||||
InstIt head_it = instList[tid].begin();
|
||||
|
||||
DynInstPtr head_inst = std::move(*head_it);
|
||||
O3DynInstPtr head_inst = std::move(*head_it);
|
||||
instList[tid].erase(head_it);
|
||||
|
||||
assert(head_inst->readyToCommit());
|
||||
@@ -428,7 +428,7 @@ ROB<Impl>::updateHead()
|
||||
|
||||
InstIt head_thread = instList[tid].begin();
|
||||
|
||||
DynInstPtr head_inst = (*head_thread);
|
||||
O3DynInstPtr head_inst = (*head_thread);
|
||||
|
||||
assert(head_inst != 0);
|
||||
|
||||
@@ -513,7 +513,7 @@ ROB<Impl>::squash(InstSeqNum squash_num, ThreadID tid)
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
const typename Impl::DynInstPtr&
|
||||
const O3DynInstPtr&
|
||||
ROB<Impl>::readHeadInst(ThreadID tid)
|
||||
{
|
||||
if (threadEntries[tid] != 0) {
|
||||
@@ -528,7 +528,7 @@ ROB<Impl>::readHeadInst(ThreadID tid)
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
ROB<Impl>::readTailInst(ThreadID tid)
|
||||
{
|
||||
InstIt tail_thread = instList[tid].end();
|
||||
@@ -546,7 +546,7 @@ ROB<Impl>::ROBStats::ROBStats(Stats::Group *parent)
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
O3DynInstPtr
|
||||
ROB<Impl>::findInst(ThreadID tid, InstSeqNum squash_inst)
|
||||
{
|
||||
for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {
|
||||
|
||||
Reference in New Issue
Block a user