cpu: De-templatize the O3 DefaultDecode.
Change-Id: If2cef59654db4f5ff8e7cb73d1951895e5e12c9d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42110 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Gabe Black <gabe.black@gmail.com> Maintainer: Gabe Black <gabe.black@gmail.com>
This commit is contained in:
@@ -494,7 +494,7 @@ class FullO3CPU : public BaseO3CPU
|
||||
DefaultFetch fetch;
|
||||
|
||||
/** The decode stage. */
|
||||
DefaultDecode<Impl> decode;
|
||||
DefaultDecode decode;
|
||||
|
||||
/** The dispatch stage. */
|
||||
DefaultRename<Impl> rename;
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* Copyright (c) 2012, 2014 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -26,7 +38,700 @@
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu/o3/decode_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
|
||||
template class DefaultDecode<O3CPUImpl>;
|
||||
#include "arch/types.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "debug/Activity.hh"
|
||||
#include "debug/Decode.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/full_system.hh"
|
||||
|
||||
// clang complains about std::set being overloaded with Packet::set if
|
||||
// we open up the entire namespace std
|
||||
using std::list;
|
||||
|
||||
DefaultDecode::DefaultDecode(FullO3CPU<O3CPUImpl> *_cpu,
|
||||
const DerivO3CPUParams ¶ms)
|
||||
: cpu(_cpu),
|
||||
renameToDecodeDelay(params.renameToDecodeDelay),
|
||||
iewToDecodeDelay(params.iewToDecodeDelay),
|
||||
commitToDecodeDelay(params.commitToDecodeDelay),
|
||||
fetchToDecodeDelay(params.fetchToDecodeDelay),
|
||||
decodeWidth(params.decodeWidth),
|
||||
numThreads(params.numThreads),
|
||||
stats(_cpu)
|
||||
{
|
||||
if (decodeWidth > O3MaxWidth)
|
||||
fatal("decodeWidth (%d) is larger than compiled limit (%d),\n"
|
||||
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
|
||||
decodeWidth, static_cast<int>(O3MaxWidth));
|
||||
|
||||
// @todo: Make into a parameter
|
||||
skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth;
|
||||
for (int tid = 0; tid < O3MaxThreads; tid++) {
|
||||
stalls[tid] = {false};
|
||||
decodeStatus[tid] = Idle;
|
||||
bdelayDoneSeqNum[tid] = 0;
|
||||
squashInst[tid] = nullptr;
|
||||
squashAfterDelaySlot[tid] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::startupStage()
|
||||
{
|
||||
resetStage();
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::clearStates(ThreadID tid)
|
||||
{
|
||||
decodeStatus[tid] = Idle;
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::resetStage()
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
// Setup status, make sure stall signals are clear.
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
decodeStatus[tid] = Idle;
|
||||
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
DefaultDecode::name() const
|
||||
{
|
||||
return cpu->name() + ".decode";
|
||||
}
|
||||
|
||||
DefaultDecode::DecodeStats::DecodeStats(FullO3CPU<O3CPUImpl> *cpu)
|
||||
: Stats::Group(cpu, "decode"),
|
||||
ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is idle"),
|
||||
ADD_STAT(blockedCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is blocked"),
|
||||
ADD_STAT(runCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is running"),
|
||||
ADD_STAT(unblockCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is unblocking"),
|
||||
ADD_STAT(squashCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is squashing"),
|
||||
ADD_STAT(branchResolved, Stats::Units::Count::get(),
|
||||
"Number of times decode resolved a branch"),
|
||||
ADD_STAT(branchMispred, Stats::Units::Count::get(),
|
||||
"Number of times decode detected a branch misprediction"),
|
||||
ADD_STAT(controlMispred, Stats::Units::Count::get(),
|
||||
"Number of times decode detected an instruction incorrectly "
|
||||
"predicted as a control"),
|
||||
ADD_STAT(decodedInsts, Stats::Units::Count::get(),
|
||||
"Number of instructions handled by decode"),
|
||||
ADD_STAT(squashedInsts, Stats::Units::Count::get(),
|
||||
"Number of squashed instructions handled by decode")
|
||||
{
|
||||
idleCycles.prereq(idleCycles);
|
||||
blockedCycles.prereq(blockedCycles);
|
||||
runCycles.prereq(runCycles);
|
||||
unblockCycles.prereq(unblockCycles);
|
||||
squashCycles.prereq(squashCycles);
|
||||
branchResolved.prereq(branchResolved);
|
||||
branchMispred.prereq(branchMispred);
|
||||
controlMispred.prereq(controlMispred);
|
||||
decodedInsts.prereq(decodedInsts);
|
||||
squashedInsts.prereq(squashedInsts);
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
|
||||
{
|
||||
timeBuffer = tb_ptr;
|
||||
|
||||
// Setup wire to write information back to fetch.
|
||||
toFetch = timeBuffer->getWire(0);
|
||||
|
||||
// Create wires to get information from proper places in time buffer.
|
||||
fromRename = timeBuffer->getWire(-renameToDecodeDelay);
|
||||
fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
|
||||
fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
|
||||
{
|
||||
decodeQueue = dq_ptr;
|
||||
|
||||
// Setup wire to write information to proper place in decode queue.
|
||||
toRename = decodeQueue->getWire(0);
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
|
||||
{
|
||||
fetchQueue = fq_ptr;
|
||||
|
||||
// Setup wire to read information from fetch queue.
|
||||
fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::setActiveThreads(std::list<ThreadID> *at_ptr)
|
||||
{
|
||||
activeThreads = at_ptr;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::drainSanityCheck() const
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
assert(insts[tid].empty());
|
||||
assert(skidBuffer[tid].empty());
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::isDrained() const
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
if (!insts[tid].empty() || !skidBuffer[tid].empty() ||
|
||||
(decodeStatus[tid] != Running && decodeStatus[tid] != Idle))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::checkStall(ThreadID tid) const
|
||||
{
|
||||
bool ret_val = false;
|
||||
|
||||
if (stalls[tid].rename) {
|
||||
DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid);
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::fetchInstsValid()
|
||||
{
|
||||
return fromFetch->size > 0;
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::block(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] Blocking.\n", tid);
|
||||
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
|
||||
// If the decode status is blocked or unblocking then decode has not yet
|
||||
// signalled fetch to unblock. In that case, there is no need to tell
|
||||
// fetch to block.
|
||||
if (decodeStatus[tid] != Blocked) {
|
||||
// Set the status to Blocked.
|
||||
decodeStatus[tid] = Blocked;
|
||||
|
||||
if (toFetch->decodeUnblock[tid]) {
|
||||
toFetch->decodeUnblock[tid] = false;
|
||||
} else {
|
||||
toFetch->decodeBlock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::unblock(ThreadID tid)
|
||||
{
|
||||
// Decode is done unblocking only if the skid buffer is empty.
|
||||
if (skidBuffer[tid].empty()) {
|
||||
DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid);
|
||||
toFetch->decodeUnblock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
return true;
|
||||
}
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
|
||||
"prediction detected at decode.\n", tid, inst->seqNum);
|
||||
|
||||
// Send back mispredict information.
|
||||
toFetch->decodeInfo[tid].branchMispredict = true;
|
||||
toFetch->decodeInfo[tid].predIncorrect = true;
|
||||
toFetch->decodeInfo[tid].mispredictInst = inst;
|
||||
toFetch->decodeInfo[tid].squash = true;
|
||||
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
||||
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
|
||||
toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching();
|
||||
toFetch->decodeInfo[tid].squashInst = inst;
|
||||
if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) {
|
||||
toFetch->decodeInfo[tid].branchTaken = true;
|
||||
}
|
||||
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
|
||||
// Might have to tell fetch to unblock.
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid &&
|
||||
fromFetch->insts[i]->seqNum > squash_seq_num) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
// Squash instructions up until this one
|
||||
cpu->removeInstsUntil(squash_seq_num, tid);
|
||||
}
|
||||
|
||||
unsigned
|
||||
DefaultDecode::squash(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] Squashing.\n",tid);
|
||||
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
if (FullSystem) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
} else {
|
||||
// In syscall emulation, we can have both a block and a squash due
|
||||
// to a syscall in the same cycle. This would cause both signals
|
||||
// to be high. This shouldn't happen in full system.
|
||||
// @todo: Determine if this still happens.
|
||||
if (toFetch->decodeBlock[tid])
|
||||
toFetch->decodeBlock[tid] = 0;
|
||||
else
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
// Go through incoming instructions from fetch and squash them.
|
||||
unsigned squash_count = 0;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
squash_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
return squash_count;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::skidInsert(ThreadID tid)
|
||||
{
|
||||
O3DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
|
||||
insts[tid].pop();
|
||||
|
||||
assert(tid == inst->threadNumber);
|
||||
|
||||
skidBuffer[tid].push(inst);
|
||||
|
||||
DPRINTF(Decode, "Inserting [tid:%d][sn:%lli] PC: %s into decode "
|
||||
"skidBuffer %i\n", inst->threadNumber, inst->seqNum,
|
||||
inst->pcState(), skidBuffer[tid].size());
|
||||
}
|
||||
|
||||
// @todo: Eventually need to enforce this by not letting a thread
|
||||
// fetch past its skidbuffer
|
||||
assert(skidBuffer[tid].size() <= skidBufferMax);
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::skidsEmpty()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
if (!skidBuffer[tid].empty())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::updateStatus()
|
||||
{
|
||||
bool any_unblocking = false;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (decodeStatus[tid] == Unblocking) {
|
||||
any_unblocking = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode will have activity if it's unblocking.
|
||||
if (any_unblocking) {
|
||||
if (_status == Inactive) {
|
||||
_status = Active;
|
||||
|
||||
DPRINTF(Activity, "Activating stage.\n");
|
||||
|
||||
cpu->activateStage(FullO3CPU<O3CPUImpl>::DecodeIdx);
|
||||
}
|
||||
} else {
|
||||
// If it's not unblocking, then decode will not have any internal
|
||||
// activity. Switch it to inactive.
|
||||
if (_status == Active) {
|
||||
_status = Inactive;
|
||||
DPRINTF(Activity, "Deactivating stage.\n");
|
||||
|
||||
cpu->deactivateStage(FullO3CPU<O3CPUImpl>::DecodeIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::sortInsts()
|
||||
{
|
||||
int insts_from_fetch = fromFetch->size;
|
||||
for (int i = 0; i < insts_from_fetch; ++i) {
|
||||
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::readStallSignals(ThreadID tid)
|
||||
{
|
||||
if (fromRename->renameBlock[tid]) {
|
||||
stalls[tid].rename = true;
|
||||
}
|
||||
|
||||
if (fromRename->renameUnblock[tid]) {
|
||||
assert(stalls[tid].rename);
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
DefaultDecode::checkSignalsAndUpdate(ThreadID tid)
|
||||
{
|
||||
// Check if there's a squash signal, squash if there is.
|
||||
// Check stall signals, block if necessary.
|
||||
// If status was blocked
|
||||
// Check if stall conditions have passed
|
||||
// if so then go to unblocking
|
||||
// If status was Squashing
|
||||
// check if squashing is not high. Switch to running this cycle.
|
||||
|
||||
// Update the per thread stall statuses.
|
||||
readStallSignals(tid);
|
||||
|
||||
// Check squash signals from commit.
|
||||
if (fromCommit->commitInfo[tid].squash) {
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash "
|
||||
"from commit.\n", tid);
|
||||
|
||||
squash(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (checkStall(tid)) {
|
||||
return block(tid);
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Unblocking;
|
||||
|
||||
unblock(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Squashing) {
|
||||
// Switch status to running if decode isn't being told to block or
|
||||
// squash this cycle.
|
||||
DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we've reached this point, we have not gotten any signals that
|
||||
// cause decode to change its status. Decode remains the same as before.
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::tick()
|
||||
{
|
||||
wroteToTimeBuffer = false;
|
||||
|
||||
bool status_change = false;
|
||||
|
||||
toRenameIndex = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
sortInsts();
|
||||
|
||||
//Check stall and squash signals.
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
DPRINTF(Decode,"Processing [tid:%i]\n",tid);
|
||||
status_change = checkSignalsAndUpdate(tid) || status_change;
|
||||
|
||||
decode(status_change, tid);
|
||||
}
|
||||
|
||||
if (status_change) {
|
||||
updateStatus();
|
||||
}
|
||||
|
||||
if (wroteToTimeBuffer) {
|
||||
DPRINTF(Activity, "Activity this cycle.\n");
|
||||
|
||||
cpu->activityThisCycle();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::decode(bool &status_change, ThreadID tid)
|
||||
{
|
||||
// If status is Running or idle,
|
||||
// call decodeInsts()
|
||||
// If status is Unblocking,
|
||||
// buffer any instructions coming from fetch
|
||||
// continue trying to empty skid buffer
|
||||
// check if stall conditions have passed
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
++stats.blockedCycles;
|
||||
} else if (decodeStatus[tid] == Squashing) {
|
||||
++stats.squashCycles;
|
||||
}
|
||||
|
||||
// Decode should try to decode as many instructions as its bandwidth
|
||||
// will allow, as long as it is not currently blocked.
|
||||
if (decodeStatus[tid] == Running ||
|
||||
decodeStatus[tid] == Idle) {
|
||||
DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run "
|
||||
"stage.\n",tid);
|
||||
|
||||
decodeInsts(tid);
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
// Make sure that the skid buffer has something in it if the
|
||||
// status is unblocking.
|
||||
assert(!skidsEmpty());
|
||||
|
||||
// If the status was unblocking, then instructions from the skid
|
||||
// buffer were used. Remove those instructions and handle
|
||||
// the rest of unblocking.
|
||||
decodeInsts(tid);
|
||||
|
||||
if (fetchInstsValid()) {
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
}
|
||||
|
||||
status_change = unblock(tid) || status_change;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
DefaultDecode::decodeInsts(ThreadID tid)
|
||||
{
|
||||
// Instructions can come either from the skid buffer or the list of
|
||||
// instructions coming from fetch, depending on decode's status.
|
||||
int insts_available = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid].size() : insts[tid].size();
|
||||
|
||||
if (insts_available == 0) {
|
||||
DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out"
|
||||
" early.\n",tid);
|
||||
// Should I change the status to idle?
|
||||
++stats.idleCycles;
|
||||
return;
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid "
|
||||
"buffer.\n",tid);
|
||||
++stats.unblockCycles;
|
||||
} else if (decodeStatus[tid] == Running) {
|
||||
++stats.runCycles;
|
||||
}
|
||||
|
||||
std::queue<O3DynInstPtr>
|
||||
&insts_to_decode = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid] : insts[tid];
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid);
|
||||
|
||||
while (insts_available > 0 && toRenameIndex < decodeWidth) {
|
||||
assert(!insts_to_decode.empty());
|
||||
|
||||
O3DynInstPtr inst = std::move(insts_to_decode.front());
|
||||
|
||||
insts_to_decode.pop();
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with "
|
||||
"PC %s\n", tid, inst->seqNum, inst->pcState());
|
||||
|
||||
if (inst->isSquashed()) {
|
||||
DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is "
|
||||
"squashed, skipping.\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
|
||||
++stats.squashedInsts;
|
||||
|
||||
--insts_available;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Also check if instructions have no source registers. Mark
|
||||
// them as ready to issue at any time. Not sure if this check
|
||||
// should exist here or at a later stage; however it doesn't matter
|
||||
// too much for function correctness.
|
||||
if (inst->numSrcRegs() == 0) {
|
||||
inst->setCanIssue();
|
||||
}
|
||||
|
||||
// This current instruction is valid, so add it into the decode
|
||||
// queue. The next instruction may not be valid, so check to
|
||||
// see if branches were predicted correctly.
|
||||
toRename->insts[toRenameIndex] = inst;
|
||||
|
||||
++(toRename->size);
|
||||
++toRenameIndex;
|
||||
++stats.decodedInsts;
|
||||
--insts_available;
|
||||
|
||||
#if TRACING_ON
|
||||
if (Debug::O3PipeView) {
|
||||
inst->decodeTick = curTick() - inst->fetchTick;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure that if it was predicted as a branch, it really is a
|
||||
// branch.
|
||||
if (inst->readPredTaken() && !inst->isControl()) {
|
||||
panic("Instruction predicted as a branch!");
|
||||
|
||||
++stats.controlMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Go ahead and compute any PC-relative branches.
|
||||
// This includes direct unconditional control and
|
||||
// direct conditional control that is predicted taken.
|
||||
if (inst->isDirectCtrl() &&
|
||||
(inst->isUncondCtrl() || inst->readPredTaken()))
|
||||
{
|
||||
++stats.branchResolved;
|
||||
|
||||
if (!(inst->branchTarget() == inst->readPredTarg())) {
|
||||
++stats.branchMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
TheISA::PCState target = inst->branchTarget();
|
||||
|
||||
DPRINTF(Decode,
|
||||
"[tid:%i] [sn:%llu] "
|
||||
"Updating predictions: Wrong predicted target: %s \
|
||||
PredPC: %s\n",
|
||||
tid, inst->seqNum, inst->readPredTarg(), target);
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't process all instructions, then we will need to block
|
||||
// and put all those instructions into the skid buffer.
|
||||
if (!insts_to_decode.empty()) {
|
||||
block(tid);
|
||||
}
|
||||
|
||||
// Record that decode has written to the time buffer for activity
|
||||
// tracking.
|
||||
if (toRenameIndex) {
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/dyn_inst_ptr.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
|
||||
@@ -61,7 +62,6 @@ class FullO3CPU;
|
||||
* actually decoded when the StaticInst is created, this stage does
|
||||
* not do much other than check any PC-relative branches.
|
||||
*/
|
||||
template<class Impl>
|
||||
class DefaultDecode
|
||||
{
|
||||
public:
|
||||
@@ -94,7 +94,7 @@ class DefaultDecode
|
||||
|
||||
public:
|
||||
/** DefaultDecode constructor. */
|
||||
DefaultDecode(FullO3CPU<Impl> *_cpu, const DerivO3CPUParams ¶ms);
|
||||
DefaultDecode(FullO3CPU<O3CPUImpl> *_cpu, const DerivO3CPUParams ¶ms);
|
||||
|
||||
void startupStage();
|
||||
|
||||
@@ -173,7 +173,7 @@ class DefaultDecode
|
||||
bool checkStall(ThreadID tid) const;
|
||||
|
||||
/** Returns if there any instructions from fetch on this cycle. */
|
||||
inline bool fetchInstsValid();
|
||||
bool fetchInstsValid();
|
||||
|
||||
/** Switches decode to blocking, and signals back that decode has
|
||||
* become blocked.
|
||||
@@ -201,35 +201,35 @@ class DefaultDecode
|
||||
private:
|
||||
// Interfaces to objects outside of decode.
|
||||
/** CPU interface. */
|
||||
FullO3CPU<Impl> *cpu;
|
||||
FullO3CPU<O3CPUImpl> *cpu;
|
||||
|
||||
/** Time buffer interface. */
|
||||
TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to get rename's output from backwards time buffer. */
|
||||
typename TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
|
||||
TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
|
||||
|
||||
/** Wire to get iew's information from backwards time buffer. */
|
||||
typename TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
|
||||
TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
|
||||
|
||||
/** Wire to get commit's information from backwards time buffer. */
|
||||
typename TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
|
||||
TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Wire to write information heading to previous stages. */
|
||||
// Might not be the best name as not only fetch will read it.
|
||||
typename TimeBuffer<O3Comm::TimeStruct>::wire toFetch;
|
||||
TimeBuffer<O3Comm::TimeStruct>::wire toFetch;
|
||||
|
||||
/** Decode instruction queue. */
|
||||
TimeBuffer<O3Comm::DecodeStruct> *decodeQueue;
|
||||
|
||||
/** Wire used to write any information heading to rename. */
|
||||
typename TimeBuffer<O3Comm::DecodeStruct>::wire toRename;
|
||||
TimeBuffer<O3Comm::DecodeStruct>::wire toRename;
|
||||
|
||||
/** Fetch instruction queue interface. */
|
||||
TimeBuffer<O3Comm::FetchStruct> *fetchQueue;
|
||||
|
||||
/** Wire to get fetch's output from fetch queue. */
|
||||
typename TimeBuffer<O3Comm::FetchStruct>::wire fromFetch;
|
||||
TimeBuffer<O3Comm::FetchStruct>::wire fromFetch;
|
||||
|
||||
/** Queue of all instructions coming from fetch this cycle. */
|
||||
std::queue<O3DynInstPtr> insts[O3MaxThreads];
|
||||
@@ -292,7 +292,7 @@ class DefaultDecode
|
||||
|
||||
struct DecodeStats : public Stats::Group
|
||||
{
|
||||
DecodeStats(FullO3CPU<Impl> *cpu);
|
||||
DecodeStats(FullO3CPU<O3CPUImpl> *cpu);
|
||||
|
||||
/** Stat for total number of idle cycles. */
|
||||
Stats::Scalar idleCycles;
|
||||
|
||||
@@ -1,767 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012, 2014 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_DECODE_IMPL_HH__
|
||||
#define __CPU_O3_DECODE_IMPL_HH__
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/limits.hh"
|
||||
#include "debug/Activity.hh"
|
||||
#include "debug/Decode.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/full_system.hh"
|
||||
|
||||
// clang complains about std::set being overloaded with Packet::set if
|
||||
// we open up the entire namespace std
|
||||
using std::list;
|
||||
|
||||
template<class Impl>
|
||||
DefaultDecode<Impl>::DefaultDecode(FullO3CPU<Impl> *_cpu,
|
||||
const DerivO3CPUParams ¶ms)
|
||||
: cpu(_cpu),
|
||||
renameToDecodeDelay(params.renameToDecodeDelay),
|
||||
iewToDecodeDelay(params.iewToDecodeDelay),
|
||||
commitToDecodeDelay(params.commitToDecodeDelay),
|
||||
fetchToDecodeDelay(params.fetchToDecodeDelay),
|
||||
decodeWidth(params.decodeWidth),
|
||||
numThreads(params.numThreads),
|
||||
stats(_cpu)
|
||||
{
|
||||
if (decodeWidth > O3MaxWidth)
|
||||
fatal("decodeWidth (%d) is larger than compiled limit (%d),\n"
|
||||
"\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
|
||||
decodeWidth, static_cast<int>(O3MaxWidth));
|
||||
|
||||
// @todo: Make into a parameter
|
||||
skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth;
|
||||
for (int tid = 0; tid < O3MaxThreads; tid++) {
|
||||
stalls[tid] = {false};
|
||||
decodeStatus[tid] = Idle;
|
||||
bdelayDoneSeqNum[tid] = 0;
|
||||
squashInst[tid] = nullptr;
|
||||
squashAfterDelaySlot[tid] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::startupStage()
|
||||
{
|
||||
resetStage();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
decodeStatus[tid] = Idle;
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::resetStage()
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
// Setup status, make sure stall signals are clear.
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
decodeStatus[tid] = Idle;
|
||||
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
std::string
|
||||
DefaultDecode<Impl>::name() const
|
||||
{
|
||||
return cpu->name() + ".decode";
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
DefaultDecode<Impl>::DecodeStats::DecodeStats(FullO3CPU<Impl> *cpu)
|
||||
: Stats::Group(cpu, "decode"),
|
||||
ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is idle"),
|
||||
ADD_STAT(blockedCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is blocked"),
|
||||
ADD_STAT(runCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is running"),
|
||||
ADD_STAT(unblockCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is unblocking"),
|
||||
ADD_STAT(squashCycles, Stats::Units::Cycle::get(),
|
||||
"Number of cycles decode is squashing"),
|
||||
ADD_STAT(branchResolved, Stats::Units::Count::get(),
|
||||
"Number of times decode resolved a branch"),
|
||||
ADD_STAT(branchMispred, Stats::Units::Count::get(),
|
||||
"Number of times decode detected a branch misprediction"),
|
||||
ADD_STAT(controlMispred, Stats::Units::Count::get(),
|
||||
"Number of times decode detected an instruction incorrectly "
|
||||
"predicted as a control"),
|
||||
ADD_STAT(decodedInsts, Stats::Units::Count::get(),
|
||||
"Number of instructions handled by decode"),
|
||||
ADD_STAT(squashedInsts, Stats::Units::Count::get(),
|
||||
"Number of squashed instructions handled by decode")
|
||||
{
|
||||
idleCycles.prereq(idleCycles);
|
||||
blockedCycles.prereq(blockedCycles);
|
||||
runCycles.prereq(runCycles);
|
||||
unblockCycles.prereq(unblockCycles);
|
||||
squashCycles.prereq(squashCycles);
|
||||
branchResolved.prereq(branchResolved);
|
||||
branchMispred.prereq(branchMispred);
|
||||
controlMispred.prereq(controlMispred);
|
||||
decodedInsts.prereq(decodedInsts);
|
||||
squashedInsts.prereq(squashedInsts);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *tb_ptr)
|
||||
{
|
||||
timeBuffer = tb_ptr;
|
||||
|
||||
// Setup wire to write information back to fetch.
|
||||
toFetch = timeBuffer->getWire(0);
|
||||
|
||||
// Create wires to get information from proper places in time buffer.
|
||||
fromRename = timeBuffer->getWire(-renameToDecodeDelay);
|
||||
fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
|
||||
fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<O3Comm::DecodeStruct> *dq_ptr)
|
||||
{
|
||||
decodeQueue = dq_ptr;
|
||||
|
||||
// Setup wire to write information to proper place in decode queue.
|
||||
toRename = decodeQueue->getWire(0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *fq_ptr)
|
||||
{
|
||||
fetchQueue = fq_ptr;
|
||||
|
||||
// Setup wire to read information from fetch queue.
|
||||
fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
|
||||
{
|
||||
activeThreads = at_ptr;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::drainSanityCheck() const
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
assert(insts[tid].empty());
|
||||
assert(skidBuffer[tid].empty());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::isDrained() const
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
if (!insts[tid].empty() || !skidBuffer[tid].empty() ||
|
||||
(decodeStatus[tid] != Running && decodeStatus[tid] != Idle))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::checkStall(ThreadID tid) const
|
||||
{
|
||||
bool ret_val = false;
|
||||
|
||||
if (stalls[tid].rename) {
|
||||
DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid);
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
inline bool
|
||||
DefaultDecode<Impl>::fetchInstsValid()
|
||||
{
|
||||
return fromFetch->size > 0;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::block(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] Blocking.\n", tid);
|
||||
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
|
||||
// If the decode status is blocked or unblocking then decode has not yet
|
||||
// signalled fetch to unblock. In that case, there is no need to tell
|
||||
// fetch to block.
|
||||
if (decodeStatus[tid] != Blocked) {
|
||||
// Set the status to Blocked.
|
||||
decodeStatus[tid] = Blocked;
|
||||
|
||||
if (toFetch->decodeUnblock[tid]) {
|
||||
toFetch->decodeUnblock[tid] = false;
|
||||
} else {
|
||||
toFetch->decodeBlock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::unblock(ThreadID tid)
|
||||
{
|
||||
// Decode is done unblocking only if the skid buffer is empty.
|
||||
if (skidBuffer[tid].empty()) {
|
||||
DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid);
|
||||
toFetch->decodeUnblock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
return true;
|
||||
}
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::squash(const O3DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch "
|
||||
"prediction detected at decode.\n", tid, inst->seqNum);
|
||||
|
||||
// Send back mispredict information.
|
||||
toFetch->decodeInfo[tid].branchMispredict = true;
|
||||
toFetch->decodeInfo[tid].predIncorrect = true;
|
||||
toFetch->decodeInfo[tid].mispredictInst = inst;
|
||||
toFetch->decodeInfo[tid].squash = true;
|
||||
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
||||
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
|
||||
toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching();
|
||||
toFetch->decodeInfo[tid].squashInst = inst;
|
||||
if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) {
|
||||
toFetch->decodeInfo[tid].branchTaken = true;
|
||||
}
|
||||
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
|
||||
// Might have to tell fetch to unblock.
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid &&
|
||||
fromFetch->insts[i]->seqNum > squash_seq_num) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
// Squash instructions up until this one
|
||||
cpu->removeInstsUntil(squash_seq_num, tid);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
unsigned
|
||||
DefaultDecode<Impl>::squash(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i] Squashing.\n",tid);
|
||||
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
if (FullSystem) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
} else {
|
||||
// In syscall emulation, we can have both a block and a squash due
|
||||
// to a syscall in the same cycle. This would cause both signals
|
||||
// to be high. This shouldn't happen in full system.
|
||||
// @todo: Determine if this still happens.
|
||||
if (toFetch->decodeBlock[tid])
|
||||
toFetch->decodeBlock[tid] = 0;
|
||||
else
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
// Go through incoming instructions from fetch and squash them.
|
||||
unsigned squash_count = 0;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
squash_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
return squash_count;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::skidInsert(ThreadID tid)
|
||||
{
|
||||
O3DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
|
||||
insts[tid].pop();
|
||||
|
||||
assert(tid == inst->threadNumber);
|
||||
|
||||
skidBuffer[tid].push(inst);
|
||||
|
||||
DPRINTF(Decode,"Inserting [tid:%d][sn:%lli] PC: %s into decode skidBuffer %i\n",
|
||||
inst->threadNumber, inst->seqNum, inst->pcState(), skidBuffer[tid].size());
|
||||
}
|
||||
|
||||
// @todo: Eventually need to enforce this by not letting a thread
|
||||
// fetch past its skidbuffer
|
||||
assert(skidBuffer[tid].size() <= skidBufferMax);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::skidsEmpty()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
if (!skidBuffer[tid].empty())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::updateStatus()
|
||||
{
|
||||
bool any_unblocking = false;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (decodeStatus[tid] == Unblocking) {
|
||||
any_unblocking = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode will have activity if it's unblocking.
|
||||
if (any_unblocking) {
|
||||
if (_status == Inactive) {
|
||||
_status = Active;
|
||||
|
||||
DPRINTF(Activity, "Activating stage.\n");
|
||||
|
||||
cpu->activateStage(FullO3CPU<Impl>::DecodeIdx);
|
||||
}
|
||||
} else {
|
||||
// If it's not unblocking, then decode will not have any internal
|
||||
// activity. Switch it to inactive.
|
||||
if (_status == Active) {
|
||||
_status = Inactive;
|
||||
DPRINTF(Activity, "Deactivating stage.\n");
|
||||
|
||||
cpu->deactivateStage(FullO3CPU<Impl>::DecodeIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_fetch = fromFetch->size;
|
||||
for (int i = 0; i < insts_from_fetch; ++i) {
|
||||
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::readStallSignals(ThreadID tid)
|
||||
{
|
||||
if (fromRename->renameBlock[tid]) {
|
||||
stalls[tid].rename = true;
|
||||
}
|
||||
|
||||
if (fromRename->renameUnblock[tid]) {
|
||||
assert(stalls[tid].rename);
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::checkSignalsAndUpdate(ThreadID tid)
|
||||
{
|
||||
// Check if there's a squash signal, squash if there is.
|
||||
// Check stall signals, block if necessary.
|
||||
// If status was blocked
|
||||
// Check if stall conditions have passed
|
||||
// if so then go to unblocking
|
||||
// If status was Squashing
|
||||
// check if squashing is not high. Switch to running this cycle.
|
||||
|
||||
// Update the per thread stall statuses.
|
||||
readStallSignals(tid);
|
||||
|
||||
// Check squash signals from commit.
|
||||
if (fromCommit->commitInfo[tid].squash) {
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash "
|
||||
"from commit.\n", tid);
|
||||
|
||||
squash(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (checkStall(tid)) {
|
||||
return block(tid);
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Unblocking;
|
||||
|
||||
unblock(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Squashing) {
|
||||
// Switch status to running if decode isn't being told to block or
|
||||
// squash this cycle.
|
||||
DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we've reached this point, we have not gotten any signals that
|
||||
// cause decode to change its status. Decode remains the same as before.
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::tick()
|
||||
{
|
||||
wroteToTimeBuffer = false;
|
||||
|
||||
bool status_change = false;
|
||||
|
||||
toRenameIndex = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
sortInsts();
|
||||
|
||||
//Check stall and squash signals.
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
DPRINTF(Decode,"Processing [tid:%i]\n",tid);
|
||||
status_change = checkSignalsAndUpdate(tid) || status_change;
|
||||
|
||||
decode(status_change, tid);
|
||||
}
|
||||
|
||||
if (status_change) {
|
||||
updateStatus();
|
||||
}
|
||||
|
||||
if (wroteToTimeBuffer) {
|
||||
DPRINTF(Activity, "Activity this cycle.\n");
|
||||
|
||||
cpu->activityThisCycle();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::decode(bool &status_change, ThreadID tid)
|
||||
{
|
||||
// If status is Running or idle,
|
||||
// call decodeInsts()
|
||||
// If status is Unblocking,
|
||||
// buffer any instructions coming from fetch
|
||||
// continue trying to empty skid buffer
|
||||
// check if stall conditions have passed
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
++stats.blockedCycles;
|
||||
} else if (decodeStatus[tid] == Squashing) {
|
||||
++stats.squashCycles;
|
||||
}
|
||||
|
||||
// Decode should try to decode as many instructions as its bandwidth
|
||||
// will allow, as long as it is not currently blocked.
|
||||
if (decodeStatus[tid] == Running ||
|
||||
decodeStatus[tid] == Idle) {
|
||||
DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run "
|
||||
"stage.\n",tid);
|
||||
|
||||
decodeInsts(tid);
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
// Make sure that the skid buffer has something in it if the
|
||||
// status is unblocking.
|
||||
assert(!skidsEmpty());
|
||||
|
||||
// If the status was unblocking, then instructions from the skid
|
||||
// buffer were used. Remove those instructions and handle
|
||||
// the rest of unblocking.
|
||||
decodeInsts(tid);
|
||||
|
||||
if (fetchInstsValid()) {
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
}
|
||||
|
||||
status_change = unblock(tid) || status_change;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::decodeInsts(ThreadID tid)
|
||||
{
|
||||
// Instructions can come either from the skid buffer or the list of
|
||||
// instructions coming from fetch, depending on decode's status.
|
||||
int insts_available = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid].size() : insts[tid].size();
|
||||
|
||||
if (insts_available == 0) {
|
||||
DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out"
|
||||
" early.\n",tid);
|
||||
// Should I change the status to idle?
|
||||
++stats.idleCycles;
|
||||
return;
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid "
|
||||
"buffer.\n",tid);
|
||||
++stats.unblockCycles;
|
||||
} else if (decodeStatus[tid] == Running) {
|
||||
++stats.runCycles;
|
||||
}
|
||||
|
||||
std::queue<O3DynInstPtr>
|
||||
&insts_to_decode = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid] : insts[tid];
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid);
|
||||
|
||||
while (insts_available > 0 && toRenameIndex < decodeWidth) {
|
||||
assert(!insts_to_decode.empty());
|
||||
|
||||
O3DynInstPtr inst = std::move(insts_to_decode.front());
|
||||
|
||||
insts_to_decode.pop();
|
||||
|
||||
DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with "
|
||||
"PC %s\n", tid, inst->seqNum, inst->pcState());
|
||||
|
||||
if (inst->isSquashed()) {
|
||||
DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is "
|
||||
"squashed, skipping.\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
|
||||
++stats.squashedInsts;
|
||||
|
||||
--insts_available;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Also check if instructions have no source registers. Mark
|
||||
// them as ready to issue at any time. Not sure if this check
|
||||
// should exist here or at a later stage; however it doesn't matter
|
||||
// too much for function correctness.
|
||||
if (inst->numSrcRegs() == 0) {
|
||||
inst->setCanIssue();
|
||||
}
|
||||
|
||||
// This current instruction is valid, so add it into the decode
|
||||
// queue. The next instruction may not be valid, so check to
|
||||
// see if branches were predicted correctly.
|
||||
toRename->insts[toRenameIndex] = inst;
|
||||
|
||||
++(toRename->size);
|
||||
++toRenameIndex;
|
||||
++stats.decodedInsts;
|
||||
--insts_available;
|
||||
|
||||
#if TRACING_ON
|
||||
if (Debug::O3PipeView) {
|
||||
inst->decodeTick = curTick() - inst->fetchTick;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Ensure that if it was predicted as a branch, it really is a
|
||||
// branch.
|
||||
if (inst->readPredTaken() && !inst->isControl()) {
|
||||
panic("Instruction predicted as a branch!");
|
||||
|
||||
++stats.controlMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Go ahead and compute any PC-relative branches.
|
||||
// This includes direct unconditional control and
|
||||
// direct conditional control that is predicted taken.
|
||||
if (inst->isDirectCtrl() &&
|
||||
(inst->isUncondCtrl() || inst->readPredTaken()))
|
||||
{
|
||||
++stats.branchResolved;
|
||||
|
||||
if (!(inst->branchTarget() == inst->readPredTarg())) {
|
||||
++stats.branchMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
TheISA::PCState target = inst->branchTarget();
|
||||
|
||||
DPRINTF(Decode,
|
||||
"[tid:%i] [sn:%llu] "
|
||||
"Updating predictions: Wrong predicted target: %s \
|
||||
PredPC: %s\n",
|
||||
tid, inst->seqNum, inst->readPredTarg(), target);
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't process all instructions, then we will need to block
|
||||
// and put all those instructions into the skid buffer.
|
||||
if (!insts_to_decode.empty()) {
|
||||
block(tid);
|
||||
}
|
||||
|
||||
// Record that decode has written to the time buffer for activity
|
||||
// tracking.
|
||||
if (toRenameIndex) {
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
}
|
||||
|
||||
#endif//__CPU_O3_DECODE_IMPL_HH__
|
||||
Reference in New Issue
Block a user