Updates for OzoneCPU.

cpu/static_inst.hh:
    Updates for new CPU, also include a classification of quiesce instructions.

--HG--
extra : convert_revision : a34cd56da88fe57d7de24674fbb375bbf13f887f
This commit is contained in:
Kevin Lim
2006-04-22 19:10:39 -04:00
parent bfc507e44e
commit 6b4396111b
19 changed files with 3619 additions and 48 deletions

View File

@@ -2,4 +2,4 @@
#include "cpu/ozone/back_end_impl.hh"
#include "cpu/ozone/ozone_impl.hh"
template class BackEnd<OzoneImpl>;
//template class BackEnd<OzoneImpl>;

View File

@@ -125,6 +125,7 @@ class BackEnd
InstList nonSpec;
InstList replayList;
ReadyInstQueue readyQueue;
public:
int size;
int numInsts;
int width;
@@ -321,6 +322,12 @@ class BackEnd
int numROBEntries;
int numInsts;
bool squashPending;
InstSeqNum squashSeqNum;
Addr squashNextPC;
Fault faultFromFetch;
private:
typedef typename std::list<DynInstPtr>::iterator InstListIt;

View File

@@ -100,6 +100,7 @@ BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
numInsts++;
inst_count[0]++;
if (!inst->isNonSpeculative()) {
DPRINTF(BE, "Instruction [sn:%lli] added to IQ\n", inst->seqNum);
if (inst->readyToIssue()) {
toBeScheduled.push_front(inst);
inst->iqIt = toBeScheduled.begin();
@@ -110,6 +111,7 @@ BackEnd<Impl>::InstQueue::insert(DynInstPtr &inst)
inst->iqItValid = true;
}
} else {
DPRINTF(BE, "Nonspeculative instruction [sn:%lli] added to IQ\n", inst->seqNum);
nonSpec.push_front(inst);
inst->iqIt = nonSpec.begin();
inst->iqItValid = true;
@@ -159,6 +161,8 @@ BackEnd<Impl>::InstQueue::scheduleNonSpec(const InstSeqNum &sn)
*/
DynInstPtr inst = nonSpec.back();
DPRINTF(BE, "Nonspeculative instruction [sn:%lli] scheduled\n", inst->seqNum);
assert(inst->seqNum == sn);
assert(find(NonSpec, inst->iqIt));
@@ -193,6 +197,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
InstListIt iq_end_it = iq.end();
while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
iq.erase(iq_it++);
--numInsts;
@@ -202,6 +207,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
iq_end_it = nonSpec.end();
while (iq_it != iq_end_it && (*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
nonSpec.erase(iq_it++);
--numInsts;
@@ -212,6 +218,7 @@ BackEnd<Impl>::InstQueue::squash(const InstSeqNum &sn)
while (iq_it != iq_end_it) {
if ((*iq_it)->seqNum > sn) {
DPRINTF(BE, "Instruction [sn:%lli] removed from IQ\n", (*iq_it)->seqNum);
(*iq_it)->iqItValid = false;
replayList.erase(iq_it++);
--numInsts;
@@ -243,20 +250,24 @@ BackEnd<Impl>::InstQueue::wakeDependents(DynInstPtr &inst)
std::vector<DynInstPtr> &dependents = inst->getDependents();
int num_outputs = dependents.size();
DPRINTF(BE, "Waking instruction [sn:%lli] dependents in IQ\n", inst->seqNum);
for (int i = 0; i < num_outputs; i++) {
DynInstPtr inst = dependents[i];
inst->markSrcRegReady();
if (inst->readyToIssue() && inst->iqItValid) {
if (inst->isNonSpeculative()) {
assert(find(NonSpec, inst->iqIt));
nonSpec.erase(inst->iqIt);
DynInstPtr dep_inst = dependents[i];
dep_inst->markSrcRegReady();
DPRINTF(BE, "Marking source reg ready [sn:%lli] in IQ\n", dep_inst->seqNum);
if (dep_inst->readyToIssue() && dep_inst->iqItValid) {
if (dep_inst->isNonSpeculative()) {
assert(find(NonSpec, dep_inst->iqIt));
nonSpec.erase(dep_inst->iqIt);
} else {
assert(find(IQ, inst->iqIt));
iq.erase(inst->iqIt);
assert(find(IQ, dep_inst->iqIt));
iq.erase(dep_inst->iqIt);
}
toBeScheduled.push_front(inst);
inst->iqIt = toBeScheduled.begin();
toBeScheduled.push_front(dep_inst);
dep_inst->iqIt = toBeScheduled.begin();
}
}
return num_outputs;
@@ -266,6 +277,7 @@ template <class Impl>
void
BackEnd<Impl>::InstQueue::rescheduleMemInst(DynInstPtr &inst)
{
DPRINTF(BE, "Rescheduling memory instruction [sn:%lli]\n", inst->seqNum);
assert(!inst->iqItValid);
replayList.push_front(inst);
inst->iqIt = replayList.begin();
@@ -277,11 +289,14 @@ template <class Impl>
void
BackEnd<Impl>::InstQueue::replayMemInst(DynInstPtr &inst)
{
DPRINTF(BE, "Replaying memory instruction [sn:%lli]\n", inst->seqNum);
assert(find(ReplayList, inst->iqIt));
InstListIt iq_it = --replayList.end();
InstListIt iq_end_it = replayList.end();
while (iq_it != iq_end_it) {
DynInstPtr rescheduled_inst = (*iq_it);
DPRINTF(BE, "Memory instruction [sn:%lli] also replayed\n", inst->seqNum);
replayList.erase(iq_it--);
toBeScheduled.push_front(rescheduled_inst);
rescheduled_inst->iqIt = toBeScheduled.begin();
@@ -952,6 +967,9 @@ BackEnd<Impl>::tick()
commitInsts();
DPRINTF(BE, "IQ entries in use: %i, ROB entries in use: %i, LSQ loads: %i, LSQ stores: %i\n",
IQ.numInsts, numInsts, LSQ.numLoads(), LSQ.numStores());
assert(numInsts == instList.size());
}
@@ -1034,11 +1052,11 @@ BackEnd<Impl>::dispatchInsts()
// Get instruction from front of time buffer
DynInstPtr inst = dispatch.front();
dispatch.pop_front();
--dispatchSize;
if (inst->isSquashed())
continue;
--dispatchSize;
++numInsts;
instList.push_back(inst);
@@ -1118,6 +1136,7 @@ template <class Impl>
void
BackEnd<Impl>::checkDispatchStatus()
{
DPRINTF(BE, "Checking dispatch status\n");
assert(dispatchStatus == Blocked);
if (!IQ.isFull() && !LSQ.isFull() && !isFull()) {
DPRINTF(BE, "Dispatch no longer blocked\n");
@@ -1526,6 +1545,24 @@ BackEnd<Impl>::commitInst(int inst_num)
// Write the done sequence number here.
toIEW->doneSeqNum = inst->seqNum;
#if FULL_SYSTEM
int count = 0;
Addr oldpc;
do {
if (count == 0)
assert(!thread->inSyscall && !thread->trapPending);
oldpc = thread->readPC();
cpu->system->pcEventQueue.service(
thread->getXCProxy());
count++;
} while (oldpc != thread->readPC());
if (count > 1) {
DPRINTF(BE, "PC skip function event, stopping commit\n");
// completed_last_inst = false;
// squashPending = true;
return false;
}
#endif
return true;
}
@@ -1566,7 +1603,11 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
while (insts_it != dispatch_end && (*insts_it)->seqNum > sn)
{
DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
if ((*insts_it)->isSquashed()) {
--insts_it;
continue;
}
DPRINTF(BE, "Squashing instruction on dispatch list PC %#x, [sn:%lli].\n",
(*insts_it)->readPC(),
(*insts_it)->seqNum);
@@ -1576,9 +1617,12 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
(*insts_it)->setCanCommit();
// Be careful with IPRs and such here
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
renameTable[(*insts_it)->destRegIdx(i)] =
(*insts_it)->getPrevDestInst(i);
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
(int)(*insts_it)->destRegIdx(i), prev_dest);
renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
++freed_regs;
}
@@ -1592,7 +1636,11 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
while (!instList.empty() && (*insts_it)->seqNum > sn)
{
DPRINTF(BE, "Squashing instruction PC %#x, [sn:%lli].\n",
if ((*insts_it)->isSquashed()) {
--insts_it;
continue;
}
DPRINTF(BE, "Squashing instruction on inst list PC %#x, [sn:%lli].\n",
(*insts_it)->readPC(),
(*insts_it)->seqNum);
@@ -1603,8 +1651,10 @@ BackEnd<Impl>::squash(const InstSeqNum &sn)
(*insts_it)->setCanCommit();
for (int i = 0; i < (*insts_it)->numDestRegs(); ++i) {
renameTable[(*insts_it)->destRegIdx(i)] =
(*insts_it)->getPrevDestInst(i);
DynInstPtr prev_dest = (*insts_it)->getPrevDestInst(i);
DPRINTF(BE, "Commit rename map setting register %i to [sn:%lli]\n",
(int)(*insts_it)->destRegIdx(i), prev_dest);
renameTable[(*insts_it)->destRegIdx(i)] = prev_dest;
++freed_regs;
}
@@ -1649,6 +1699,7 @@ template <class Impl>
void
BackEnd<Impl>::fetchFault(Fault &fault)
{
faultFromFetch = fault;
}
template <class Impl>

View File

@@ -42,6 +42,7 @@
#include "cpu/pc_event.hh"
#include "cpu/static_inst.hh"
#include "mem/mem_interface.hh"
#include "mem/page_table.hh"
#include "sim/eventq.hh"
// forward declarations
@@ -427,34 +428,22 @@ class OzoneCPU : public BaseCPU
int getInstAsid() { return thread.asid; }
int getDataAsid() { return thread.asid; }
Fault dummyTranslation(MemReqPtr &req)
{
#if 0
assert((req->vaddr >> 48 & 0xffff) == 0);
#endif
// put the asid in the upper 16 bits of the paddr
req->paddr = req->vaddr & ~((Addr)0xffff << sizeof(Addr) * 8 - 16);
req->paddr = req->paddr | (Addr)req->asid << sizeof(Addr) * 8 - 16;
return NoFault;
}
/** Translates instruction requestion in syscall emulation mode. */
Fault translateInstReq(MemReqPtr &req)
{
return dummyTranslation(req);
return this->pTable->translate(req);
}
/** Translates data read request in syscall emulation mode. */
Fault translateDataReadReq(MemReqPtr &req)
{
return dummyTranslation(req);
return this->pTable->translate(req);
}
/** Translates data write request in syscall emulation mode. */
Fault translateDataWriteReq(MemReqPtr &req)
{
return dummyTranslation(req);
return this->pTable->translate(req);
}
#endif
/** CPU read function, forwards read to LSQ. */
@@ -500,6 +489,7 @@ class OzoneCPU : public BaseCPU
bool inPalMode() { return AlphaISA::PcPAL(thread.PC); }
bool inPalMode(Addr pc) { return AlphaISA::PcPAL(pc); }
bool simPalCheck(int palFunc);
void processInterrupts();
#else
void syscall();
void setSyscallReturn(SyscallReturn return_value, int tid);

View File

@@ -45,7 +45,7 @@ SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
#else
SimObjectVectorParam<Process *> workload;
//SimObjectParam<PageTable *> page_table;
SimObjectParam<PageTable *> page_table;
#endif // FULL_SYSTEM
SimObjectParam<FunctionalMemory *> mem;
@@ -159,7 +159,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
INIT_PARAM(dtb, "Data translation buffer"),
#else
INIT_PARAM(workload, "Processes to run"),
// INIT_PARAM(page_table, "Page table"),
INIT_PARAM(page_table, "Page table"),
#endif // FULL_SYSTEM
INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -310,7 +310,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
params->dtb = dtb;
#else
params->workload = workload;
// params->pTable = page_table;
params->pTable = page_table;
#endif // FULL_SYSTEM
params->mem = mem;
@@ -440,7 +440,7 @@ SimObjectParam<AlphaITB *> itb;
SimObjectParam<AlphaDTB *> dtb;
#else
SimObjectVectorParam<Process *> workload;
//SimObjectParam<PageTable *> page_table;
SimObjectParam<PageTable *> page_table;
#endif // FULL_SYSTEM
SimObjectParam<FunctionalMemory *> mem;
@@ -554,7 +554,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
INIT_PARAM(dtb, "Data translation buffer"),
#else
INIT_PARAM(workload, "Processes to run"),
// INIT_PARAM(page_table, "Page table"),
INIT_PARAM(page_table, "Page table"),
#endif // FULL_SYSTEM
INIT_PARAM_DFLT(mem, "Memory", NULL),
@@ -705,7 +705,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
params->dtb = dtb;
#else
params->workload = workload;
// params->pTable = page_table;
params->pTable = page_table;
#endif // FULL_SYSTEM
params->mem = mem;

View File

@@ -765,7 +765,7 @@ void
OzoneCPU<Impl>::squashFromXC()
{
thread.inSyscall = true;
backEnd->squashFromXC();
backEnd->generateXCEvent();
}
#if !FULL_SYSTEM
@@ -832,6 +832,58 @@ OzoneCPU<Impl>::hwrei()
return NoFault;
}
template <class Impl>
void
OzoneCPU<Impl>::processInterrupts()
{
// Check for interrupts here. For now can copy the code that
// exists within isa_fullsys_traits.hh. Also assume that thread 0
// is the one that handles the interrupts.
// Check if there are any outstanding interrupts
//Handle the interrupts
int ipl = 0;
int summary = 0;
checkInterrupts = false;
if (thread.readMiscReg(IPR_ASTRR))
panic("asynchronous traps not implemented\n");
if (thread.readMiscReg(IPR_SIRR)) {
for (int i = INTLEVEL_SOFTWARE_MIN;
i < INTLEVEL_SOFTWARE_MAX; i++) {
if (thread.readMiscReg(IPR_SIRR) & (ULL(1) << i)) {
// See table 4-19 of the 21164 hardware reference
ipl = (i - INTLEVEL_SOFTWARE_MIN) + 1;
summary |= (ULL(1) << i);
}
}
}
uint64_t interrupts = intr_status();
if (interrupts) {
for (int i = INTLEVEL_EXTERNAL_MIN;
i < INTLEVEL_EXTERNAL_MAX; i++) {
if (interrupts & (ULL(1) << i)) {
// See table 4-19 of the 21164 hardware reference
ipl = i;
summary |= (ULL(1) << i);
}
}
}
if (ipl && ipl > thread.readMiscReg(IPR_IPLR)) {
thread.setMiscReg(IPR_ISR, summary);
thread.setMiscReg(IPR_INTID, ipl);
Fault fault = new InterruptFault;
fault->invoke(thread.getXCProxy());
DPRINTF(Flow, "Interrupt! IPLR=%d ipl=%d summary=%x\n",
thread.readMiscReg(IPR_IPLR), ipl, summary);
}
}
template <class Impl>
bool
OzoneCPU<Impl>::simPalCheck(int palFunc)

View File

@@ -76,6 +76,10 @@ class FrontEnd
bool processBarriers(DynInstPtr &inst);
void handleFault(Fault &fault);
public:
Fault getFault() { return fetchFault; }
private:
Fault fetchFault;
// Align an address (typically a PC) to the start of an I-cache block.
// We fold in the PISA 64- to 32-bit conversion here as well.

View File

@@ -48,6 +48,7 @@ FrontEnd<Impl>::FrontEnd(Params *params)
#if !FULL_SYSTEM
pTable = params->pTable;
#endif
fetchFault = NoFault;
}
template <class Impl>
@@ -273,6 +274,7 @@ FrontEnd<Impl>::tick()
Fault fault = fetchCacheLine();
if (fault != NoFault) {
handleFault(fault);
fetchFault = fault;
return;
}
fetchCacheLineNextCycle = false;
@@ -349,7 +351,7 @@ FrontEnd<Impl>::fetchCacheLine()
// Read a cache line, based on the current PC.
#if FULL_SYSTEM
// Flag to say whether or not address is physical addr.
unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
unsigned flags = cpu->inPalMode(PC) ? PHYSICAL : 0;
#else
unsigned flags = 0;
#endif // FULL_SYSTEM
@@ -503,6 +505,9 @@ FrontEnd<Impl>::squash(const InstSeqNum &squash_num, const Addr &next_PC,
DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n",
squash_num, next_PC);
if (fetchFault != NoFault)
fetchFault = NoFault;
while (!instBuffer.empty() &&
instBuffer.back()->seqNum > squash_num) {
DynInstPtr inst = instBuffer.back();
@@ -604,9 +609,13 @@ FrontEnd<Impl>::addFreeRegs(int num_freed)
status = Running;
}
DPRINTF(FE, "Adding %i freed registers\n", num_freed);
freeRegs+= num_freed;
assert(freeRegs <= numPhysRegs);
// assert(freeRegs <= numPhysRegs);
if (freeRegs > numPhysRegs)
freeRegs = numPhysRegs;
}
template <class Impl>

View File

@@ -54,6 +54,7 @@ class InorderBackEnd
void squash(const InstSeqNum &squash_num, const Addr &next_PC);
void squashFromXC();
void generateXCEvent() { }
bool robEmpty() { return instList.empty(); }

View File

@@ -567,8 +567,11 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
req->data = new uint8_t[64];
assert(!req->completionEvent);
req->completionEvent =
new typename BackEnd::LdWritebackEvent(loadQueue[load_idx], be);
typedef typename BackEnd::LdWritebackEvent LdWritebackEvent;
LdWritebackEvent *wb = new LdWritebackEvent(loadQueue[load_idx], be);
req->completionEvent = wb;
// Do Cache Access
MemAccessResult result = dcacheInterface->access(req);
@@ -586,6 +589,8 @@ OzoneLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
_status = DcacheMissStall;
wb->setDcacheMiss();
} else {
// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
// inst->seqNum);

View File

@@ -698,7 +698,7 @@ OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
stallingLoadIdx = 0;
}
loadQueue[load_idx]->squashed = true;
// loadQueue[load_idx]->squashed = true;
loadQueue[load_idx] = NULL;
--loads;
@@ -728,7 +728,7 @@ OzoneLSQ<Impl>::squash(const InstSeqNum &squashed_num)
stallingStoreIsn = 0;
}
storeQueue[store_idx].inst->squashed = true;
// storeQueue[store_idx].inst->squashed = true;
storeQueue[store_idx].inst = NULL;
storeQueue[store_idx].canWB = 0;

5
cpu/ozone/lw_back_end.cc Normal file
View File

@@ -0,0 +1,5 @@
#include "cpu/ozone/lw_back_end_impl.hh"
#include "cpu/ozone/ozone_impl.hh"
template class LWBackEnd<OzoneImpl>;

503
cpu/ozone/lw_back_end.hh Normal file
View File

@@ -0,0 +1,503 @@
#ifndef __CPU_OZONE_LW_BACK_END_HH__
#define __CPU_OZONE_LW_BACK_END_HH__
#include <list>
#include <queue>
#include <set>
#include <string>
#include "arch/faults.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/ozone/rename_table.hh"
#include "cpu/ozone/thread_state.hh"
#include "mem/functional/functional.hh"
#include "mem/mem_interface.hh"
#include "mem/mem_req.hh"
#include "sim/eventq.hh"
class ExecContext;
template <class Impl>
class OzoneThreadState;
template <class Impl>
class LWBackEnd
{
public:
typedef OzoneThreadState<Impl> Thread;
typedef typename Impl::Params Params;
typedef typename Impl::DynInst DynInst;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::FrontEnd FrontEnd;
typedef typename Impl::FullCPU::CommStruct CommStruct;
struct SizeStruct {
int size;
};
typedef SizeStruct DispatchToIssue;
typedef SizeStruct IssueToExec;
typedef SizeStruct ExecToCommit;
typedef SizeStruct Writeback;
TimeBuffer<DispatchToIssue> d2i;
typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
TimeBuffer<IssueToExec> i2e;
typename TimeBuffer<IssueToExec>::wire instsToExecute;
TimeBuffer<ExecToCommit> e2c;
TimeBuffer<Writeback> numInstsToWB;
TimeBuffer<CommStruct> *comm;
typename TimeBuffer<CommStruct>::wire toIEW;
typename TimeBuffer<CommStruct>::wire fromCommit;
class TrapEvent : public Event {
private:
LWBackEnd<Impl> *be;
public:
TrapEvent(LWBackEnd<Impl> *_be);
void process();
const char *description();
};
/** LdWriteback event for a load completion. */
class LdWritebackEvent : public Event {
private:
/** Instruction that is writing back data to the register file. */
DynInstPtr inst;
/** Pointer to IEW stage. */
LWBackEnd *be;
bool dcacheMiss;
public:
/** Constructs a load writeback event. */
LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);
/** Processes writeback event. */
virtual void process();
/** Returns the description of the writeback event. */
virtual const char *description();
void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
};
LWBackEnd(Params *params);
std::string name() const;
void regStats();
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
void setFrontEnd(FrontEnd *front_end_ptr)
{ frontEnd = front_end_ptr; }
void setXC(ExecContext *xc_ptr)
{ xc = xc_ptr; }
void setThreadState(Thread *thread_ptr)
{ thread = thread_ptr; }
void setCommBuffer(TimeBuffer<CommStruct> *_comm);
void tick();
void squash();
void generateXCEvent() { xcSquash = true; }
void squashFromXC();
void squashFromTrap();
void checkInterrupts();
bool trapSquash;
bool xcSquash;
template <class T>
Fault read(MemReqPtr &req, T &data, int load_idx);
template <class T>
Fault write(MemReqPtr &req, T &data, int store_idx);
Addr readCommitPC() { return commitPC; }
Addr commitPC;
bool robEmpty() { return instList.empty(); }
bool isFull() { return numInsts >= numROBEntries; }
bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }
void fetchFault(Fault &fault);
int wakeDependents(DynInstPtr &inst);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
*/
void rescheduleMemInst(DynInstPtr &inst);
/** Re-executes all rescheduled memory instructions. */
void replayMemInst(DynInstPtr &inst);
/** Completes memory instruction. */
void completeMemInst(DynInstPtr &inst) { }
void addDcacheMiss(DynInstPtr &inst)
{
waitingMemOps.insert(inst->seqNum);
numWaitingMemOps++;
DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
inst->seqNum, numWaitingMemOps);
}
void removeDcacheMiss(DynInstPtr &inst)
{
assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
waitingMemOps.erase(inst->seqNum);
numWaitingMemOps--;
DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
inst->seqNum, numWaitingMemOps);
}
void addWaitingMemOp(DynInstPtr &inst)
{
waitingMemOps.insert(inst->seqNum);
numWaitingMemOps++;
DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
inst->seqNum, numWaitingMemOps);
}
void removeWaitingMemOp(DynInstPtr &inst)
{
assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
waitingMemOps.erase(inst->seqNum);
numWaitingMemOps--;
DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
inst->seqNum, numWaitingMemOps);
}
void instToCommit(DynInstPtr &inst);
private:
void generateTrapEvent(Tick latency = 0);
void handleFault(Fault &fault, Tick latency = 0);
void updateStructures();
void dispatchInsts();
void dispatchStall();
void checkDispatchStatus();
void executeInsts();
void commitInsts();
void addToLSQ(DynInstPtr &inst);
void writebackInsts();
bool commitInst(int inst_num);
void squash(const InstSeqNum &sn);
void squashDueToBranch(DynInstPtr &inst);
void squashDueToMemViolation(DynInstPtr &inst);
void squashDueToMemBlocked(DynInstPtr &inst);
void updateExeInstStats(DynInstPtr &inst);
void updateComInstStats(DynInstPtr &inst);
public:
FullCPU *cpu;
FrontEnd *frontEnd;
ExecContext *xc;
Thread *thread;
enum Status {
Running,
Idle,
DcacheMissStall,
DcacheMissComplete,
Blocked,
TrapPending
};
Status status;
Status dispatchStatus;
Status commitStatus;
Counter funcExeInst;
private:
// typedef typename Impl::InstQueue InstQueue;
// InstQueue IQ;
typedef typename Impl::LdstQueue LdstQueue;
LdstQueue LSQ;
public:
RenameTable<Impl> commitRenameTable;
RenameTable<Impl> renameTable;
private:
class DCacheCompletionEvent : public Event
{
private:
LWBackEnd *be;
public:
DCacheCompletionEvent(LWBackEnd *_be);
virtual void process();
virtual const char *description();
};
friend class DCacheCompletionEvent;
DCacheCompletionEvent cacheCompletionEvent;
MemInterface *dcacheInterface;
MemReqPtr memReq;
// General back end width. Used if the more specific isn't given.
int width;
// Dispatch width.
int dispatchWidth;
int numDispatchEntries;
int dispatchSize;
int waitingInsts;
int issueWidth;
// Writeback width
int wbWidth;
// Commit width
int commitWidth;
/** Index into queue of instructions being written back. */
unsigned wbNumInst;
/** Cycle number within the queue of instructions being written
* back. Used in case there are too many instructions writing
* back at the current cycle and writesbacks need to be scheduled
* for the future. See comments in instToCommit().
*/
unsigned wbCycle;
int numROBEntries;
int numInsts;
std::set<InstSeqNum> waitingMemOps;
typedef std::set<InstSeqNum>::iterator MemIt;
int numWaitingMemOps;
unsigned maxOutstandingMemOps;
bool squashPending;
InstSeqNum squashSeqNum;
Addr squashNextPC;
Fault faultFromFetch;
bool fetchHasFault;
private:
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
};
typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
ReadyInstQueue exeList;
typedef typename std::list<DynInstPtr>::iterator InstListIt;
std::list<DynInstPtr> instList;
std::list<DynInstPtr> waitingList;
std::list<DynInstPtr> replayList;
std::list<DynInstPtr> writeback;
int latency;
int squashLatency;
bool exactFullStall;
bool fetchRedirect[Impl::MaxThreads];
// number of cycles stalled for D-cache misses
/* Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
*/
Stats::Vector<> rob_cap_events;
Stats::Vector<> rob_cap_inst_count;
Stats::Vector<> iq_cap_events;
Stats::Vector<> iq_cap_inst_count;
// total number of instructions executed
Stats::Vector<> exe_inst;
Stats::Vector<> exe_swp;
Stats::Vector<> exe_nop;
Stats::Vector<> exe_refs;
Stats::Vector<> exe_loads;
Stats::Vector<> exe_branches;
Stats::Vector<> issued_ops;
// total number of loads forwaded from LSQ stores
Stats::Vector<> lsq_forw_loads;
// total number of loads ignored due to invalid addresses
Stats::Vector<> inv_addr_loads;
// total number of software prefetches ignored due to invalid addresses
Stats::Vector<> inv_addr_swpfs;
// ready loads blocked due to memory disambiguation
Stats::Vector<> lsq_blocked_loads;
Stats::Scalar<> lsqInversion;
Stats::Vector<> n_issued_dist;
Stats::VectorDistribution<> issue_delay_dist;
Stats::VectorDistribution<> queue_res_dist;
/*
Stats::Vector<> stat_fu_busy;
Stats::Vector2d<> stat_fuBusy;
Stats::Vector<> dist_unissued;
Stats::Vector2d<> stat_issued_inst_type;
Stats::Formula misspec_cnt;
Stats::Formula misspec_ipc;
Stats::Formula issue_rate;
Stats::Formula issue_stores;
Stats::Formula issue_op_rate;
Stats::Formula fu_busy_rate;
Stats::Formula commit_stores;
Stats::Formula commit_ipc;
Stats::Formula commit_ipb;
Stats::Formula lsq_inv_rate;
*/
Stats::Vector<> writeback_count;
Stats::Vector<> producer_inst;
Stats::Vector<> consumer_inst;
Stats::Vector<> wb_penalized;
Stats::Formula wb_rate;
Stats::Formula wb_fanout;
Stats::Formula wb_penalized_rate;
// total number of instructions committed
Stats::Vector<> stat_com_inst;
Stats::Vector<> stat_com_swp;
Stats::Vector<> stat_com_refs;
Stats::Vector<> stat_com_loads;
Stats::Vector<> stat_com_membars;
Stats::Vector<> stat_com_branches;
Stats::Distribution<> n_committed_dist;
Stats::Scalar<> commit_eligible_samples;
Stats::Vector<> commit_eligible;
Stats::Scalar<> ROB_fcount;
Stats::Formula ROB_full_rate;
Stats::Vector<> ROB_count; // cumulative ROB occupancy
Stats::Formula ROB_occ_rate;
Stats::VectorDistribution<> ROB_occ_dist;
public:
void dumpInsts();
};
template <class Impl>
template <class T>
Fault
LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
{
/* memReq->reset(addr, sizeof(T), flags);
// translate to physical address
Fault fault = cpu->translateDataReadReq(memReq);
// if we have a cache, do cache access too
if (fault == NoFault && dcacheInterface) {
memReq->cmd = Read;
memReq->completionEvent = NULL;
memReq->time = curTick;
memReq->flags &= ~INST_READ;
MemAccessResult result = dcacheInterface->access(memReq);
// Ugly hack to get an event scheduled *only* if the access is
// a miss. We really should add first-class support for this
// at some point.
if (result != MA_HIT && dcacheInterface->doEvents()) {
// Fix this hack for keeping funcExeInst correct with loads that
// are executed twice.
--funcExeInst;
memReq->completionEvent = &cacheCompletionEvent;
lastDcacheStall = curTick;
// unscheduleTickEvent();
// status = DcacheMissStall;
DPRINTF(OzoneCPU, "Dcache miss stall!\n");
} else {
// do functional access
fault = thread->mem->read(memReq, data);
}
}
*/
/*
if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
recordEvent("Uncached Read");
*/
return LSQ.read(req, data, load_idx);
}
template <class Impl>
template <class T>
Fault
LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
{
/*
memReq->reset(addr, sizeof(T), flags);
// translate to physical address
Fault fault = cpu->translateDataWriteReq(memReq);
if (fault == NoFault && dcacheInterface) {
memReq->cmd = Write;
memcpy(memReq->data,(uint8_t *)&data,memReq->size);
memReq->completionEvent = NULL;
memReq->time = curTick;
memReq->flags &= ~INST_READ;
MemAccessResult result = dcacheInterface->access(memReq);
// Ugly hack to get an event scheduled *only* if the access is
// a miss. We really should add first-class support for this
// at some point.
if (result != MA_HIT && dcacheInterface->doEvents()) {
memReq->completionEvent = &cacheCompletionEvent;
lastDcacheStall = curTick;
// unscheduleTickEvent();
// status = DcacheMissStall;
DPRINTF(OzoneCPU, "Dcache miss stall!\n");
}
}
if (res && (fault == NoFault))
*res = memReq->result;
*/
/*
if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
recordEvent("Uncached Write");
*/
return LSQ.write(req, data, store_idx);
}
#endif // __CPU_OZONE_LW_BACK_END_HH__

File diff suppressed because it is too large Load Diff

34
cpu/ozone/lw_lsq.cc Normal file
View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu/ozone/ozone_impl.hh"
#include "cpu/ozone/lw_lsq_impl.hh"
// Force the instantiation of LDSTQ for all the implementations we care about.
template class OzoneLWLSQ<OzoneImpl>;

649
cpu/ozone/lw_lsq.hh Normal file
View File

@@ -0,0 +1,649 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_OZONE_LW_LSQ_HH__
#define __CPU_OZONE_LW_LSQ_HH__
#include <list>
#include <map>
#include <queue>
#include <algorithm>
#include "arch/faults.hh"
#include "arch/isa_traits.hh"
#include "config/full_system.hh"
#include "base/hashmap.hh"
#include "cpu/inst_seq.hh"
#include "mem/mem_interface.hh"
//#include "mem/page_table.hh"
#include "sim/sim_object.hh"
class PageTable;
/**
* Class that implements the actual LQ and SQ for each specific thread.
* Both are circular queues; load entries are freed upon committing, while
* store entries are freed once they writeback. The LSQUnit tracks if there
* are memory ordering violations, and also detects partial load to store
* forwarding cases (a store only has part of a load's data) that requires
* the load to wait until the store writes back. In the former case it
* holds onto the instruction until the dependence unit looks at it, and
* in the latter it stalls the LSQ until the store writes back. At that
* point the load is replayed.
*/
template <class Impl>
class OzoneLWLSQ {
public:
typedef typename Impl::Params Params;
typedef typename Impl::FullCPU FullCPU;
typedef typename Impl::BackEnd BackEnd;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::IssueStruct IssueStruct;
typedef TheISA::IntReg IntReg;
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator LdMapIt;
private:
class StoreCompletionEvent : public Event {
public:
/** Constructs a store completion event. */
StoreCompletionEvent(DynInstPtr &inst, BackEnd *be,
Event *wb_event, OzoneLWLSQ *lsq_ptr);
/** Processes the store completion event. */
void process();
/** Returns the description of this event. */
const char *description();
private:
/** The store index of the store being written back. */
DynInstPtr inst;
BackEnd *be;
/** The writeback event for the store. Needed for store
* conditionals.
*/
Event *wbEvent;
/** The pointer to the LSQ unit that issued the store. */
OzoneLWLSQ<Impl> *lsqPtr;
};
public:
/** Constructs an LSQ unit. init() must be called prior to use. */
OzoneLWLSQ();
/** Initializes the LSQ unit with the specified number of entries. */
void init(Params *params, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
/** Sets the CPU pointer. */
void setCPU(FullCPU *cpu_ptr)
{ cpu = cpu_ptr; }
/** Sets the back-end stage pointer. */
void setBE(BackEnd *be_ptr)
{ be = be_ptr; }
/** Sets the page table pointer. */
void setPageTable(PageTable *pt_ptr);
/** Ticks the LSQ unit, which in this case only resets the number of
* used cache ports.
* @todo: Move the number of used ports up to the LSQ level so it can
* be shared by all LSQ units.
*/
void tick() { usedPorts = 0; }
/** Inserts an instruction. */
void insert(DynInstPtr &inst);
/** Inserts a load instruction. */
void insertLoad(DynInstPtr &load_inst);
/** Inserts a store instruction. */
void insertStore(DynInstPtr &store_inst);
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
// Fault executeLoad(int lq_idx);
/** Executes a store instruction. */
Fault executeStore(DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
/** Commits loads older than a specific sequence number. */
void commitLoads(InstSeqNum &youngest_inst);
/** Commits stores older than a specific sequence number. */
void commitStores(InstSeqNum &youngest_inst);
/** Writes back stores. */
void writebackStores();
// @todo: Include stats in the LSQ unit.
//void regStats();
/** Clears all the entries in the LQ. */
void clearLQ();
/** Clears all the entries in the SQ. */
void clearSQ();
/** Resizes the LQ to a given size. */
void resizeLQ(unsigned size);
/** Resizes the SQ to a given size. */
void resizeSQ(unsigned size);
/** Squashes all instructions younger than a specific sequence number. */
void squash(const InstSeqNum &squashed_num);
/** Returns if there is a memory ordering violation. Value is reset upon
* call to getMemDepViolator().
*/
bool violation() { return memDepViolator; }
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
/** Returns if a load became blocked due to the memory system. It clears
* the bool's value upon this being called.
*/
bool loadBlocked()
{ return isLoadBlocked; }
void clearLoadBlocked()
{ isLoadBlocked = false; }
bool isLoadBlockedHandled()
{ return loadBlockedHandled; }
void setLoadBlockedHandled()
{ loadBlockedHandled = true; }
/** Returns the number of free entries (min of free LQ and SQ entries). */
unsigned numFreeEntries();
/** Returns the number of loads ready to execute. */
int numLoadsReady();
/** Returns the number of loads in the LQ. */
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
int numStores() { return stores; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
/** Returns if the LQ is full. */
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); }
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
/** Returns the number of instructions in the LSQ. */
unsigned getCount() { return loads + stores; }
/** Returns if there are any stores to writeback. */
bool hasStoresToWB() { return storesToWB; }
/** Returns the number of stores to writeback. */
int numStoresToWB() { return storesToWB; }
/** Returns if the LSQ unit will writeback on this cycle. */
bool willWB() { return storeQueue.back().canWB &&
!storeQueue.back().completed &&
!dcacheInterface->isBlocked(); }
private:
/** Completes the store at the specified index. */
void completeStore(int store_idx);
private:
/** Pointer to the CPU. */
FullCPU *cpu;
/** Pointer to the back-end stage. */
BackEnd *be;
/** Pointer to the D-cache. */
MemInterface *dcacheInterface;
/** Pointer to the page table. */
PageTable *pTable;
public:
struct SQEntry {
/** Constructs an empty store queue entry. */
SQEntry()
: inst(NULL), req(NULL), size(0), data(0),
canWB(0), committed(0), completed(0), lqIt(NULL)
{ }
/** Constructs a store queue entry for a given instruction. */
SQEntry(DynInstPtr &_inst)
: inst(_inst), req(NULL), size(0), data(0),
canWB(0), committed(0), completed(0), lqIt(NULL)
{ }
/** The store instruction. */
DynInstPtr inst;
/** The memory request for the store. */
MemReqPtr req;
/** The size of the store. */
int size;
/** The store data. */
IntReg data;
/** Whether or not the store can writeback. */
bool canWB;
/** Whether or not the store is committed. */
bool committed;
/** Whether or not the store is completed. */
bool completed;
typename std::list<DynInstPtr>::iterator lqIt;
};
enum Status {
Running,
Idle,
DcacheMissStall,
DcacheMissSwitch
};
private:
/** The OzoneLWLSQ thread id. */
unsigned lsqID;
/** The status of the LSQ unit. */
Status _status;
/** The store queue. */
// std::vector<SQEntry> storeQueue;
std::list<SQEntry> storeQueue;
/** The load queue. */
// std::vector<DynInstPtr> loadQueue;
std::list<DynInstPtr> loadQueue;
typedef typename std::list<SQEntry>::iterator SQIt;
typedef typename std::list<DynInstPtr>::iterator LQIt;
struct HashFn {
size_t operator() (const int a) const
{
unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
return hash;
}
};
m5::hash_map<int, SQIt, HashFn> SQItHash;
std::queue<int> SQIndices;
m5::hash_map<int, LQIt, HashFn> LQItHash;
std::queue<int> LQIndices;
typedef typename m5::hash_map<int, LQIt, HashFn>::iterator LQHashIt;
typedef typename m5::hash_map<int, SQIt, HashFn>::iterator SQHashIt;
// Consider making these 16 bits
/** The number of LQ entries. */
unsigned LQEntries;
/** The number of SQ entries. */
unsigned SQEntries;
/** The number of load instructions in the LQ. */
int loads;
/** The number of store instructions in the SQ (excludes those waiting to
* writeback).
*/
int stores;
int storesToWB;
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
/** The number of used cache ports in this cycle. */
int usedPorts;
//list<InstSeqNum> mshrSeqNums;
//Stats::Scalar<> dcacheStallCycles;
Counter lastDcacheStall;
// Make these per thread?
/** Whether or not the LSQ is stalled. */
bool stalled;
/** The store that causes the stall due to partial store to load
* forwarding.
*/
InstSeqNum stallingStoreIsn;
/** The index of the above store. */
// int stallingLoadIdx;
LQIt stallingLoad;
/** Whether or not a load is blocked due to the memory system. It is
* cleared when this value is checked via loadBlocked().
*/
bool isLoadBlocked;
bool loadBlockedHandled;
InstSeqNum blockedLoadSeqNum;
/** The oldest faulting load instruction. */
DynInstPtr loadFaultInst;
/** The oldest faulting store instruction. */
DynInstPtr storeFaultInst;
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
public:
/** Executes the load at the given index. */
template <class T>
Fault read(MemReqPtr &req, T &data, int load_idx);
/** Executes the store at the given index. */
template <class T>
Fault write(MemReqPtr &req, T &data, int store_idx);
/** Returns the index of the head load instruction. */
// int getLoadHead() { return loadHead; }
/** Returns the sequence number of the head load instruction. */
InstSeqNum getLoadHeadSeqNum()
{
if (!loadQueue.empty()) {
return loadQueue.back()->seqNum;
} else {
return 0;
}
}
/** Returns the index of the head store instruction. */
// int getStoreHead() { return storeHead; }
/** Returns the sequence number of the head store instruction. */
InstSeqNum getStoreHeadSeqNum()
{
if (!storeQueue.empty()) {
return storeQueue.back().inst->seqNum;
} else {
return 0;
}
}
/** Returns whether or not the LSQ unit is stalled. */
bool isStalled() { return stalled; }
};
template <class Impl>
template <class T>
Fault
OzoneLWLSQ<Impl>::read(MemReqPtr &req, T &data, int load_idx)
{
//Depending on issue2execute delay a squashed load could
//execute if it is found to be squashed in the same
//cycle it is scheduled to execute
typename m5::hash_map<int, LQIt, HashFn>::iterator
lq_hash_it = LQItHash.find(load_idx);
assert(lq_hash_it != LQItHash.end());
DynInstPtr inst = (*(*lq_hash_it).second);
if (inst->isExecuted()) {
panic("Should not reach this point with split ops!");
memcpy(&data,req->data,req->size);
return NoFault;
}
// Make sure this isn't an uncacheable access
// A bit of a hackish way to get uncached accesses to work only if they're
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
// @todo: Fix uncached accesses.
if (req->flags & UNCACHEABLE &&
(inst != loadQueue.back() || !inst->reachedCommit)) {
DPRINTF(OzoneLSQ, "[sn:%lli] Uncached load and not head of "
"commit/LSQ!\n",
inst->seqNum);
be->rescheduleMemInst(inst);
return TheISA::genMachineCheckFault();
}
// Check the SQ for any previous stores that might lead to forwarding
SQIt sq_it = storeQueue.begin();
int store_size = 0;
DPRINTF(OzoneLSQ, "Read called, load idx: %i addr: %#x\n",
load_idx, req->paddr);
while (sq_it != storeQueue.end() && (*sq_it).inst->seqNum > inst->seqNum)
++sq_it;
while (1) {
// End once we've reached the top of the LSQ
if (sq_it == storeQueue.end()) {
break;
}
assert((*sq_it).inst);
store_size = (*sq_it).size;
if (store_size == 0) {
sq_it++;
continue;
}
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
bool store_has_lower_limit =
req->vaddr >= (*sq_it).inst->effAddr;
bool store_has_upper_limit =
(req->vaddr + req->size) <= ((*sq_it).inst->effAddr +
store_size);
bool lower_load_has_store_part =
req->vaddr < ((*sq_it).inst->effAddr +
store_size);
bool upper_load_has_store_part =
(req->vaddr + req->size) > (*sq_it).inst->effAddr;
// If the store's data has all of the data needed, we can forward.
if (store_has_lower_limit && store_has_upper_limit) {
int shift_amt = req->vaddr & (store_size - 1);
// Assumes byte addressing
shift_amt = shift_amt << 3;
// Cast this to type T?
data = (*sq_it).data >> shift_amt;
req->cmd = Read;
assert(!req->completionEvent);
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, &data, req->size);
DPRINTF(OzoneLSQ, "Forwarding from store [sn:%lli] to load to "
"[sn:%lli] addr %#x, data %#x\n",
(*sq_it).inst->seqNum, inst->seqNum, req->vaddr, *(req->data));
typename BackEnd::LdWritebackEvent *wb =
new typename BackEnd::LdWritebackEvent(inst,
be);
// We'll say this has a 1 cycle load-store forwarding latency
// for now.
// FIXME - Need to make this a parameter.
wb->schedule(curTick);
// Should keep track of stat for forwarded data
return NoFault;
} else if ((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part)) {
// This is the partial store-load forwarding case where a store
// has only part of the load's data.
// If it's already been written back, then don't worry about
// stalling on it.
if ((*sq_it).completed) {
sq_it++;
break;
}
// Must stall load and force it to retry, so long as it's the oldest
// load that needs to do so.
if (!stalled ||
(stalled &&
inst->seqNum <
(*stallingLoad)->seqNum)) {
stalled = true;
stallingStoreIsn = (*sq_it).inst->seqNum;
stallingLoad = (*lq_hash_it).second;
}
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
be->rescheduleMemInst(inst);
DPRINTF(OzoneLSQ, "Load-store forwarding mis-match. "
"Store [sn:%lli] to load addr %#x\n",
(*sq_it).inst->seqNum, req->vaddr);
return NoFault;
}
sq_it++;
}
// If there's no forwarding case, then go access memory
++usedPorts;
// if we have a cache, do cache access too
if (dcacheInterface) {
if (dcacheInterface->isBlocked()) {
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < inst->seqNum)
return NoFault;
isLoadBlocked = true;
loadBlockedHandled = false;
blockedLoadSeqNum = inst->seqNum;
// No fault occurred, even though the interface is blocked.
return NoFault;
}
DPRINTF(OzoneLSQ, "D-cache: PC:%#x reading from paddr:%#x "
"vaddr:%#x flags:%i\n",
inst->readPC(), req->paddr, req->vaddr, req->flags);
// Setup MemReq pointer
req->cmd = Read;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
assert(!req->completionEvent);
req->completionEvent =
new typename BackEnd::LdWritebackEvent(inst, be);
// Do Cache Access
MemAccessResult result = dcacheInterface->access(req);
// Ugly hack to get an event scheduled *only* if the access is
// a miss. We really should add first-class support for this
// at some point.
// @todo: Probably should support having no events
if (result != MA_HIT) {
DPRINTF(OzoneLSQ, "D-cache miss!\n");
DPRINTF(Activity, "Activity: ld accessing mem miss [sn:%lli]\n",
inst->seqNum);
lastDcacheStall = curTick;
_status = DcacheMissStall;
} else {
// DPRINTF(Activity, "Activity: ld accessing mem hit [sn:%lli]\n",
// inst->seqNum);
DPRINTF(OzoneLSQ, "D-cache hit!\n");
}
} else {
fatal("Must use D-cache with new memory system");
}
return NoFault;
}
template <class Impl>
template <class T>
Fault
OzoneLWLSQ<Impl>::write(MemReqPtr &req, T &data, int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
SQIt sq_it = (*sq_hash_it).second;
assert((*sq_it).inst);
DPRINTF(OzoneLSQ, "Doing write to store idx %i, addr %#x data %#x"
" | [sn:%lli]\n",
store_idx, req->paddr, data, (*sq_it).inst->seqNum);
(*sq_it).req = req;
(*sq_it).size = sizeof(T);
(*sq_it).data = data;
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
}
#endif // __CPU_OZONE_LW_LSQ_HH__

766
cpu/ozone/lw_lsq_impl.hh Normal file
View File

@@ -0,0 +1,766 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "arch/isa_traits.hh"
#include "base/str.hh"
#include "cpu/ozone/lw_lsq.hh"
template <class Impl>
OzoneLWLSQ<Impl>::StoreCompletionEvent::StoreCompletionEvent(DynInstPtr &_inst,
BackEnd *_be,
Event *wb_event,
OzoneLWLSQ<Impl> *lsq_ptr)
: Event(&mainEventQueue),
inst(_inst),
be(_be),
wbEvent(wb_event),
lsqPtr(lsq_ptr)
{
this->setFlags(Event::AutoDelete);
}
template <class Impl>
void
OzoneLWLSQ<Impl>::StoreCompletionEvent::process()
{
DPRINTF(OzoneLSQ, "Cache miss complete for store [sn:%lli]\n",
inst->seqNum);
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
// lsqPtr->cpu->wakeCPU();
if (wbEvent) {
wbEvent->process();
delete wbEvent;
}
lsqPtr->completeStore(inst->sqIdx);
be->removeDcacheMiss(inst);
}
template <class Impl>
const char *
OzoneLWLSQ<Impl>::StoreCompletionEvent::description()
{
return "LSQ store completion event";
}
template <class Impl>
OzoneLWLSQ<Impl>::OzoneLWLSQ()
: loads(0), stores(0), storesToWB(0), stalled(false), isLoadBlocked(false),
loadBlockedHandled(false)
{
}
template<class Impl>
void
OzoneLWLSQ<Impl>::init(Params *params, unsigned maxLQEntries,
unsigned maxSQEntries, unsigned id)
{
DPRINTF(OzoneLSQ, "Creating OzoneLWLSQ%i object.\n",id);
lsqID = id;
LQEntries = maxLQEntries;
SQEntries = maxSQEntries;
for (int i = 0; i < LQEntries * 10; i++) {
LQIndices.push(i);
SQIndices.push(i);
}
// May want to initialize these entries to NULL
// loadHead = loadTail = 0;
// storeHead = storeWBIdx = storeTail = 0;
usedPorts = 0;
cachePorts = params->cachePorts;
dcacheInterface = params->dcacheInterface;
loadFaultInst = storeFaultInst = memDepViolator = NULL;
blockedLoadSeqNum = 0;
}
template<class Impl>
std::string
OzoneLWLSQ<Impl>::name() const
{
return "lsqunit";
}
template<class Impl>
void
OzoneLWLSQ<Impl>::clearLQ()
{
loadQueue.clear();
}
template<class Impl>
void
OzoneLWLSQ<Impl>::clearSQ()
{
storeQueue.clear();
}
template<class Impl>
void
OzoneLWLSQ<Impl>::setPageTable(PageTable *pt_ptr)
{
DPRINTF(OzoneLSQ, "Setting the page table pointer.\n");
pTable = pt_ptr;
}
template<class Impl>
void
OzoneLWLSQ<Impl>::resizeLQ(unsigned size)
{
assert( size >= LQEntries);
if (size > LQEntries) {
while (size > loadQueue.size()) {
DynInstPtr dummy;
loadQueue.push_back(dummy);
LQEntries++;
}
} else {
LQEntries = size;
}
}
template<class Impl>
void
OzoneLWLSQ<Impl>::resizeSQ(unsigned size)
{
if (size > SQEntries) {
while (size > storeQueue.size()) {
SQEntry dummy;
storeQueue.push_back(dummy);
SQEntries++;
}
} else {
SQEntries = size;
}
}
template <class Impl>
void
OzoneLWLSQ<Impl>::insert(DynInstPtr &inst)
{
// Make sure we really have a memory reference.
assert(inst->isMemRef());
// Make sure it's one of the two classes of memory references.
assert(inst->isLoad() || inst->isStore());
if (inst->isLoad()) {
insertLoad(inst);
} else {
insertStore(inst);
}
// inst->setInLSQ();
}
template <class Impl>
void
OzoneLWLSQ<Impl>::insertLoad(DynInstPtr &load_inst)
{
assert(!LQIndices.empty());
int load_index = LQIndices.front();
LQIndices.pop();
DPRINTF(OzoneLSQ, "Inserting load PC %#x, idx:%i [sn:%lli]\n",
load_inst->readPC(), load_index, load_inst->seqNum);
load_inst->lqIdx = load_index;
loadQueue.push_front(load_inst);
LQItHash[load_index] = loadQueue.begin();
++loads;
}
template <class Impl>
void
OzoneLWLSQ<Impl>::insertStore(DynInstPtr &store_inst)
{
// Make sure it is not full before inserting an instruction.
assert(stores - storesToWB < SQEntries);
assert(!SQIndices.empty());
int store_index = SQIndices.front();
SQIndices.pop();
DPRINTF(OzoneLSQ, "Inserting store PC %#x, idx:%i [sn:%lli]\n",
store_inst->readPC(), store_index, store_inst->seqNum);
store_inst->sqIdx = store_index;
SQEntry entry(store_inst);
if (loadQueue.empty()) {
entry.lqIt = loadQueue.end();
} else {
entry.lqIt = loadQueue.begin();
}
storeQueue.push_front(entry);
SQItHash[store_index] = storeQueue.begin();
++stores;
}
template <class Impl>
typename Impl::DynInstPtr
OzoneLWLSQ<Impl>::getMemDepViolator()
{
DynInstPtr temp = memDepViolator;
memDepViolator = NULL;
return temp;
}
template <class Impl>
unsigned
OzoneLWLSQ<Impl>::numFreeEntries()
{
unsigned free_lq_entries = LQEntries - loads;
unsigned free_sq_entries = SQEntries - stores;
// Both the LQ and SQ entries have an extra dummy entry to differentiate
// empty/full conditions. Subtract 1 from the free entries.
if (free_lq_entries < free_sq_entries) {
return free_lq_entries - 1;
} else {
return free_sq_entries - 1;
}
}
template <class Impl>
int
OzoneLWLSQ<Impl>::numLoadsReady()
{
int retval = 0;
LQIt lq_it = loadQueue.begin();
LQIt end_it = loadQueue.end();
while (lq_it != end_it) {
if ((*lq_it)->readyToIssue()) {
++retval;
}
}
return retval;
}
template <class Impl>
Fault
OzoneLWLSQ<Impl>::executeLoad(DynInstPtr &inst)
{
// Execute a specific load.
Fault load_fault = NoFault;
DPRINTF(OzoneLSQ, "Executing load PC %#x, [sn:%lli]\n",
inst->readPC(),inst->seqNum);
// Make sure it's really in the list.
// Normally it should always be in the list. However,
/* due to a syscall it may not be the list.
#ifdef DEBUG
int i = loadHead;
while (1) {
if (i == loadTail && !find(inst)) {
assert(0 && "Load not in the queue!");
} else if (loadQueue[i] == inst) {
break;
}
i = i + 1;
if (i >= LQEntries) {
i = 0;
}
}
#endif // DEBUG*/
load_fault = inst->initiateAcc();
// Might want to make sure that I'm not overwriting a previously faulting
// instruction that hasn't been checked yet.
// Actually probably want the oldest faulting load
if (load_fault != NoFault) {
DPRINTF(OzoneLSQ, "Load [sn:%lli] has a fault\n", inst->seqNum);
// Maybe just set it as can commit here, although that might cause
// some other problems with sending traps to the ROB too quickly.
be->instToCommit(inst);
// iewStage->activityThisCycle();
}
return load_fault;
}
template <class Impl>
Fault
OzoneLWLSQ<Impl>::executeStore(DynInstPtr &store_inst)
{
// Make sure that a store exists.
assert(stores != 0);
int store_idx = store_inst->sqIdx;
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
DPRINTF(OzoneLSQ, "Executing store PC %#x [sn:%lli]\n",
store_inst->readPC(), store_inst->seqNum);
SQIt sq_it = (*sq_hash_it).second;
Fault store_fault = store_inst->initiateAcc();
// Store size should now be available. Use it to get proper offset for
// addr comparisons.
int size = (*sq_it).size;
if (size == 0) {
DPRINTF(OzoneLSQ,"Fault on Store PC %#x, [sn:%lli],Size = 0\n",
store_inst->readPC(),store_inst->seqNum);
return store_fault;
}
assert(store_fault == NoFault);
if (!storeFaultInst) {
if (store_fault != NoFault) {
panic("Fault in a store instruction!");
storeFaultInst = store_inst;
} else if (store_inst->isNonSpeculative()) {
// Nonspeculative accesses (namely store conditionals)
// need to set themselves as able to writeback if we
// haven't had a fault by here.
(*sq_it).canWB = true;
++storesToWB;
DPRINTF(OzoneLSQ, "Nonspeculative store! storesToWB:%i\n",
storesToWB);
}
}
LQIt lq_it = --(loadQueue.end());
if (!memDepViolator) {
while (lq_it != loadQueue.end()) {
if ((*lq_it)->seqNum < store_inst->seqNum) {
lq_it--;
continue;
}
// Actually should only check loads that have actually executed
// Might be safe because effAddr is set to InvalAddr when the
// dyn inst is created.
// Must actually check all addrs in the proper size range
// Which is more correct than needs to be. What if for now we just
// assume all loads are quad-word loads, and do the addr based
// on that.
// @todo: Fix this, magic number being used here
if (((*lq_it)->effAddr >> 8) ==
(store_inst->effAddr >> 8)) {
// A load incorrectly passed this store. Squash and refetch.
// For now return a fault to show that it was unsuccessful.
memDepViolator = (*lq_it);
return TheISA::genMachineCheckFault();
}
lq_it--;
}
// If we've reached this point, there was no violation.
memDepViolator = NULL;
}
return store_fault;
}
template <class Impl>
void
OzoneLWLSQ<Impl>::commitLoad()
{
assert(!loadQueue.empty());
DPRINTF(OzoneLSQ, "[sn:%lli] Committing head load instruction, PC %#x\n",
loadQueue.back()->seqNum, loadQueue.back()->readPC());
LQIndices.push(loadQueue.back()->lqIdx);
LQItHash.erase(loadQueue.back()->lqIdx);
loadQueue.pop_back();
--loads;
}
template <class Impl>
void
OzoneLWLSQ<Impl>::commitLoads(InstSeqNum &youngest_inst)
{
assert(loads == 0 || !loadQueue.empty());
while (loads != 0 &&
loadQueue.back()->seqNum <= youngest_inst) {
commitLoad();
}
}
template <class Impl>
void
OzoneLWLSQ<Impl>::commitStores(InstSeqNum &youngest_inst)
{
assert(stores == 0 || !storeQueue.empty());
SQIt sq_it = --(storeQueue.end());
while (!storeQueue.empty() && sq_it != storeQueue.end()) {
assert((*sq_it).inst);
if (!(*sq_it).canWB) {
if ((*sq_it).inst->seqNum > youngest_inst) {
break;
}
++storesToWB;
DPRINTF(OzoneLSQ, "Marking store as able to write back, PC "
"%#x [sn:%lli], storesToWB:%i\n",
(*sq_it).inst->readPC(),
(*sq_it).inst->seqNum,
storesToWB);
(*sq_it).canWB = true;
}
sq_it--;
}
}
template <class Impl>
void
OzoneLWLSQ<Impl>::writebackStores()
{
SQIt sq_it = --(storeQueue.end());
while (storesToWB > 0 &&
sq_it != storeQueue.end() &&
(*sq_it).inst &&
(*sq_it).canWB &&
usedPorts < cachePorts) {
DynInstPtr inst = (*sq_it).inst;
if ((*sq_it).size == 0 && !(*sq_it).completed) {
sq_it--;
completeStore(inst->sqIdx);
continue;
}
if (inst->isDataPrefetch() || (*sq_it).committed) {
sq_it--;
continue;
}
if (dcacheInterface && dcacheInterface->isBlocked()) {
DPRINTF(OzoneLSQ, "Unable to write back any more stores, cache"
" is blocked!\n");
break;
}
++usedPorts;
assert((*sq_it).req);
assert(!(*sq_it).committed);
MemReqPtr req = (*sq_it).req;
(*sq_it).committed = true;
req->cmd = Write;
req->completionEvent = NULL;
req->time = curTick;
assert(!req->data);
req->data = new uint8_t[64];
memcpy(req->data, (uint8_t *)&(*sq_it).data, req->size);
DPRINTF(OzoneLSQ, "D-Cache: Writing back store idx:%i PC:%#x "
"to Addr:%#x, data:%#x [sn:%lli]\n",
inst->sqIdx,inst->readPC(),
req->paddr, *(req->data),
inst->seqNum);
if (dcacheInterface) {
MemAccessResult result = dcacheInterface->access(req);
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
"load [sn:%lli]\n",
stallingStoreIsn, (*stallingLoad)->seqNum);
stalled = false;
stallingStoreIsn = 0;
be->replayMemInst((*stallingLoad));
}
if (result != MA_HIT && dcacheInterface->doEvents()) {
// Event *wb = NULL;
typename BackEnd::LdWritebackEvent *wb = NULL;
if (req->flags & LOCKED) {
// Stx_C does not generate a system port transaction.
req->result=1;
wb = new typename BackEnd::LdWritebackEvent(inst,
be);
}
DPRINTF(OzoneLSQ,"D-Cache Write Miss!\n");
// DPRINTF(Activity, "Active st accessing mem miss [sn:%lli]\n",
// inst->seqNum);
// Will stores need their own kind of writeback events?
// Do stores even need writeback events?
assert(!req->completionEvent);
req->completionEvent = new
StoreCompletionEvent(inst, be, wb, this);
be->addDcacheMiss(inst);
lastDcacheStall = curTick;
_status = DcacheMissStall;
// Increment stat here or something
sq_it--;
} else {
DPRINTF(OzoneLSQ,"D-Cache: Write Hit on idx:%i !\n",
inst->sqIdx);
// DPRINTF(Activity, "Active st accessing mem hit [sn:%lli]\n",
// inst->seqNum);
if (req->flags & LOCKED) {
// Stx_C does not generate a system port transaction.
if (req->flags & UNCACHEABLE) {
req->result = 2;
} else {
req->result = 1;
}
typename BackEnd::LdWritebackEvent *wb =
new typename BackEnd::LdWritebackEvent(inst,
be);
wb->schedule(curTick);
}
sq_it--;
completeStore(inst->sqIdx);
}
} else {
panic("Must HAVE DCACHE!!!!!\n");
}
}
// Not sure this should set it to 0.
usedPorts = 0;
assert(stores >= 0 && storesToWB >= 0);
}
template <class Impl>
void
OzoneLWLSQ<Impl>::squash(const InstSeqNum &squashed_num)
{
DPRINTF(OzoneLSQ, "Squashing until [sn:%lli]!"
"(Loads:%i Stores:%i)\n",squashed_num,loads,stores);
LQIt lq_it = loadQueue.begin();
while (loads != 0 && (*lq_it)->seqNum > squashed_num) {
assert(!loadQueue.empty());
// Clear the smart pointer to make sure it is decremented.
DPRINTF(OzoneLSQ,"Load Instruction PC %#x squashed, "
"[sn:%lli]\n",
(*lq_it)->readPC(),
(*lq_it)->seqNum);
if (isStalled() && lq_it == stallingLoad) {
stalled = false;
stallingStoreIsn = 0;
stallingLoad = NULL;
}
--loads;
// Inefficient!
LQHashIt lq_hash_it = LQItHash.find((*lq_it)->lqIdx);
assert(lq_hash_it != LQItHash.end());
LQItHash.erase(lq_hash_it);
LQIndices.push((*lq_it)->lqIdx);
loadQueue.erase(lq_it++);
}
if (isLoadBlocked) {
if (squashed_num < blockedLoadSeqNum) {
isLoadBlocked = false;
loadBlockedHandled = false;
blockedLoadSeqNum = 0;
}
}
SQIt sq_it = storeQueue.begin();
while (stores != 0 && (*sq_it).inst->seqNum > squashed_num) {
assert(!storeQueue.empty());
// Clear the smart pointer to make sure it is decremented.
DPRINTF(OzoneLSQ,"Store Instruction PC %#x idx:%i squashed [sn:%lli]\n",
(*sq_it).inst->readPC(), (*sq_it).inst->sqIdx,
(*sq_it).inst->seqNum);
// I don't think this can happen. It should have been cleared by the
// stalling load.
if (isStalled() &&
(*sq_it).inst->seqNum == stallingStoreIsn) {
panic("Is stalled should have been cleared by stalling load!\n");
stalled = false;
stallingStoreIsn = 0;
}
SQHashIt sq_hash_it = SQItHash.find((*sq_it).inst->sqIdx);
assert(sq_hash_it != SQItHash.end());
SQItHash.erase(sq_hash_it);
SQIndices.push((*sq_it).inst->sqIdx);
(*sq_it).inst = NULL;
(*sq_it).canWB = 0;
if ((*sq_it).req) {
assert(!(*sq_it).req->completionEvent);
}
(*sq_it).req = NULL;
--stores;
storeQueue.erase(sq_it++);
}
}
template <class Impl>
void
OzoneLWLSQ<Impl>::dumpInsts()
{
cprintf("Load store queue: Dumping instructions.\n");
cprintf("Load queue size: %i\n", loads);
cprintf("Load queue: ");
LQIt lq_it = --(loadQueue.end());
while (lq_it != loadQueue.end() && (*lq_it)) {
cprintf("[sn:%lli] %#x ", (*lq_it)->seqNum,
(*lq_it)->readPC());
lq_it--;
}
cprintf("\nStore queue size: %i\n", stores);
cprintf("Store queue: ");
SQIt sq_it = --(storeQueue.end());
while (sq_it != storeQueue.end() && (*sq_it).inst) {
cprintf("[sn:%lli]\nPC:%#x\nSize:%i\nCommitted:%i\nCompleted:%i\ncanWB:%i\n",
(*sq_it).inst->seqNum,
(*sq_it).inst->readPC(),
(*sq_it).size,
(*sq_it).committed,
(*sq_it).completed,
(*sq_it).canWB);
sq_it--;
}
cprintf("\n");
}
template <class Impl>
void
OzoneLWLSQ<Impl>::completeStore(int store_idx)
{
SQHashIt sq_hash_it = SQItHash.find(store_idx);
assert(sq_hash_it != SQItHash.end());
SQIt sq_it = (*sq_hash_it).second;
assert((*sq_it).inst);
(*sq_it).completed = true;
DynInstPtr inst = (*sq_it).inst;
--storesToWB;
if (isStalled() &&
inst->seqNum == stallingStoreIsn) {
DPRINTF(OzoneLSQ, "Unstalling, stalling store [sn:%lli] "
"load [sn:%lli]\n",
stallingStoreIsn, (*stallingLoad)->seqNum);
stalled = false;
stallingStoreIsn = 0;
be->replayMemInst((*stallingLoad));
}
DPRINTF(OzoneLSQ, "Completing store idx:%i [sn:%lli], storesToWB:%i\n",
inst->sqIdx, inst->seqNum, storesToWB);
// A bit conservative because a store completion may not free up entries,
// but hopefully avoids two store completions in one cycle from making
// the CPU tick twice.
// cpu->activityThisCycle();
assert(!storeQueue.empty());
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(sq_it);
--stores;
/*
SQIt oldest_store_it = --(storeQueue.end());
if (sq_it == oldest_store_it) {
do {
inst = (*oldest_store_it).inst;
sq_hash_it = SQItHash.find(inst->sqIdx);
assert(sq_hash_it != SQItHash.end());
SQItHash.erase(sq_hash_it);
SQIndices.push(inst->sqIdx);
storeQueue.erase(oldest_store_it--);
--stores;
} while ((*oldest_store_it).completed &&
oldest_store_it != storeQueue.end());
// be->updateLSQNextCycle = true;
}
*/
}

View File

@@ -35,6 +35,8 @@
#include "cpu/ozone/front_end.hh"
#include "cpu/ozone/inst_queue.hh"
#include "cpu/ozone/lsq_unit.hh"
#include "cpu/ozone/lw_lsq.hh"
#include "cpu/ozone/lw_back_end.hh"
#include "cpu/ozone/null_predictor.hh"
#include "cpu/ozone/dyn_inst.hh"
#include "cpu/ozone/simple_params.hh"
@@ -55,10 +57,10 @@ struct OzoneImpl {
typedef TwobitBPredUnit<OzoneImpl> BranchPred;
typedef FrontEnd<OzoneImpl> FrontEnd;
// Will need IQ, LSQ eventually
typedef BackEnd<OzoneImpl> BackEnd;
typedef LWBackEnd<OzoneImpl> BackEnd;
typedef InstQueue<OzoneImpl> InstQueue;
typedef OzoneLSQ<OzoneImpl> LdstQueue;
typedef OzoneLWLSQ<OzoneImpl> LdstQueue;
typedef OzoneDynInst<OzoneImpl> DynInst;
typedef RefCountingPtr<DynInst> DynInstPtr;

View File

@@ -40,12 +40,17 @@
// forward declarations
struct AlphaSimpleImpl;
struct OzoneImpl;
struct SimpleImpl;
class ExecContext;
class DynInst;
template <class Impl>
class AlphaDynInst;
template <class Impl>
class OzoneDynInst;
class FastCPU;
class SimpleCPU;
class InorderCPU;
@@ -121,6 +126,7 @@ class StaticInstBase : public RefCounted
IsWriteBarrier, ///< Is a write barrier
IsNonSpeculative, ///< Should not be executed speculatively
IsQuiesce,
NumFlags
};
@@ -208,6 +214,7 @@ class StaticInstBase : public RefCounted
bool isMemBarrier() const { return flags[IsMemBarrier]; }
bool isWriteBarrier() const { return flags[IsWriteBarrier]; }
bool isNonSpeculative() const { return flags[IsNonSpeculative]; }
bool isQuiesce() const { return flags[IsQuiesce]; }
//@}
/// Operation class. Used to select appropriate function unit in issue.