From da4c0590eec6b88e6bea69ff4a2d73f8160b3e36 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 3 Mar 2021 03:19:32 -0800 Subject: [PATCH] cpu: De-templatize the O3 DefaultDecode. Change-Id: If2cef59654db4f5ff8e7cb73d1951895e5e12c9d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42110 Tested-by: kokoro Reviewed-by: Gabe Black Maintainer: Gabe Black --- src/cpu/o3/cpu.hh | 2 +- src/cpu/o3/decode.cc | 713 ++++++++++++++++++++++++++++++++++- src/cpu/o3/decode.hh | 22 +- src/cpu/o3/decode_impl.hh | 767 -------------------------------------- 4 files changed, 721 insertions(+), 783 deletions(-) delete mode 100644 src/cpu/o3/decode_impl.hh diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index fdbd1fffe6..4825facf13 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -494,7 +494,7 @@ class FullO3CPU : public BaseO3CPU DefaultFetch fetch; /** The decode stage. */ - DefaultDecode decode; + DefaultDecode decode; /** The dispatch stage. */ DefaultRename rename; diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc index b6cce41b03..073a9a1e27 100644 --- a/src/cpu/o3/decode.cc +++ b/src/cpu/o3/decode.cc @@ -1,5 +1,17 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2012, 2014 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,7 +38,700 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "cpu/o3/decode_impl.hh" -#include "cpu/o3/isa_specific.hh" +#include "cpu/o3/decode.hh" -template class DefaultDecode; +#include "arch/types.hh" +#include "base/trace.hh" +#include "config/the_isa.hh" +#include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/limits.hh" +#include "debug/Activity.hh" +#include "debug/Decode.hh" +#include "debug/O3PipeView.hh" +#include "params/DerivO3CPU.hh" +#include "sim/full_system.hh" + +// clang complains about std::set being overloaded with Packet::set if +// we open up the entire namespace std +using std::list; + +DefaultDecode::DefaultDecode(FullO3CPU *_cpu, + const DerivO3CPUParams ¶ms) + : cpu(_cpu), + renameToDecodeDelay(params.renameToDecodeDelay), + iewToDecodeDelay(params.iewToDecodeDelay), + commitToDecodeDelay(params.commitToDecodeDelay), + fetchToDecodeDelay(params.fetchToDecodeDelay), + decodeWidth(params.decodeWidth), + numThreads(params.numThreads), + stats(_cpu) +{ + if (decodeWidth > O3MaxWidth) + fatal("decodeWidth (%d) is larger than compiled limit (%d),\n" + "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n", + decodeWidth, static_cast(O3MaxWidth)); + + // @todo: Make into a parameter + skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth; + for (int tid = 0; tid < O3MaxThreads; tid++) { + stalls[tid] = {false}; + decodeStatus[tid] = Idle; + bdelayDoneSeqNum[tid] = 0; + squashInst[tid] = nullptr; + squashAfterDelaySlot[tid] = 0; + } +} + +void +DefaultDecode::startupStage() +{ + resetStage(); +} + +void +DefaultDecode::clearStates(ThreadID tid) +{ + decodeStatus[tid] = Idle; + stalls[tid].rename = false; +} + +void +DefaultDecode::resetStage() +{ + _status = Inactive; + + // Setup status, make sure stall signals are clear. + for (ThreadID tid = 0; tid < numThreads; ++tid) { + decodeStatus[tid] = Idle; + + stalls[tid].rename = false; + } +} + +std::string +DefaultDecode::name() const +{ + return cpu->name() + ".decode"; +} + +DefaultDecode::DecodeStats::DecodeStats(FullO3CPU *cpu) + : Stats::Group(cpu, "decode"), + ADD_STAT(idleCycles, Stats::Units::Cycle::get(), + "Number of cycles decode is idle"), + ADD_STAT(blockedCycles, Stats::Units::Cycle::get(), + "Number of cycles decode is blocked"), + ADD_STAT(runCycles, Stats::Units::Cycle::get(), + "Number of cycles decode is running"), + ADD_STAT(unblockCycles, Stats::Units::Cycle::get(), + "Number of cycles decode is unblocking"), + ADD_STAT(squashCycles, Stats::Units::Cycle::get(), + "Number of cycles decode is squashing"), + ADD_STAT(branchResolved, Stats::Units::Count::get(), + "Number of times decode resolved a branch"), + ADD_STAT(branchMispred, Stats::Units::Count::get(), + "Number of times decode detected a branch misprediction"), + ADD_STAT(controlMispred, Stats::Units::Count::get(), + "Number of times decode detected an instruction incorrectly " + "predicted as a control"), + ADD_STAT(decodedInsts, Stats::Units::Count::get(), + "Number of instructions handled by decode"), + ADD_STAT(squashedInsts, Stats::Units::Count::get(), + "Number of squashed instructions handled by decode") +{ + idleCycles.prereq(idleCycles); + blockedCycles.prereq(blockedCycles); + runCycles.prereq(runCycles); + unblockCycles.prereq(unblockCycles); + squashCycles.prereq(squashCycles); + branchResolved.prereq(branchResolved); + branchMispred.prereq(branchMispred); + controlMispred.prereq(controlMispred); + decodedInsts.prereq(decodedInsts); + squashedInsts.prereq(squashedInsts); +} + +void +DefaultDecode::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromRename = timeBuffer->getWire(-renameToDecodeDelay); + fromIEW = timeBuffer->getWire(-iewToDecodeDelay); + fromCommit = timeBuffer->getWire(-commitToDecodeDelay); +} + +void +DefaultDecode::setDecodeQueue(TimeBuffer *dq_ptr) +{ + decodeQueue = dq_ptr; + + // Setup wire to write information to proper place in decode queue. + toRename = decodeQueue->getWire(0); +} + +void +DefaultDecode::setFetchQueue(TimeBuffer *fq_ptr) +{ + fetchQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); +} + +void +DefaultDecode::setActiveThreads(std::list *at_ptr) +{ + activeThreads = at_ptr; +} + +void +DefaultDecode::drainSanityCheck() const +{ + for (ThreadID tid = 0; tid < numThreads; ++tid) { + assert(insts[tid].empty()); + assert(skidBuffer[tid].empty()); + } +} + +bool +DefaultDecode::isDrained() const +{ + for (ThreadID tid = 0; tid < numThreads; ++tid) { + if (!insts[tid].empty() || !skidBuffer[tid].empty() || + (decodeStatus[tid] != Running && decodeStatus[tid] != Idle)) + return false; + } + return true; +} + +bool +DefaultDecode::checkStall(ThreadID tid) const +{ + bool ret_val = false; + + if (stalls[tid].rename) { + DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid); + ret_val = true; + } + + return ret_val; +} + +bool +DefaultDecode::fetchInstsValid() +{ + return fromFetch->size > 0; +} + +bool +DefaultDecode::block(ThreadID tid) +{ + DPRINTF(Decode, "[tid:%i] Blocking.\n", tid); + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + + // If the decode status is blocked or unblocking then decode has not yet + // signalled fetch to unblock. In that case, there is no need to tell + // fetch to block. + if (decodeStatus[tid] != Blocked) { + // Set the status to Blocked. + decodeStatus[tid] = Blocked; + + if (toFetch->decodeUnblock[tid]) { + toFetch->decodeUnblock[tid] = false; + } else { + toFetch->decodeBlock[tid] = true; + wroteToTimeBuffer = true; + } + + return true; + } + + return false; +} + +bool +DefaultDecode::unblock(ThreadID tid) +{ + // Decode is done unblocking only if the skid buffer is empty. + if (skidBuffer[tid].empty()) { + DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid); + toFetch->decodeUnblock[tid] = true; + wroteToTimeBuffer = true; + + decodeStatus[tid] = Running; + return true; + } + + DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid); + + return false; +} + +void +DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid) +{ + DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch " + "prediction detected at decode.\n", tid, inst->seqNum); + + // Send back mispredict information. + toFetch->decodeInfo[tid].branchMispredict = true; + toFetch->decodeInfo[tid].predIncorrect = true; + toFetch->decodeInfo[tid].mispredictInst = inst; + toFetch->decodeInfo[tid].squash = true; + toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; + toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); + toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching(); + toFetch->decodeInfo[tid].squashInst = inst; + if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) { + toFetch->decodeInfo[tid].branchTaken = true; + } + + InstSeqNum squash_seq_num = inst->seqNum; + + // Might have to tell fetch to unblock. + if (decodeStatus[tid] == Blocked || + decodeStatus[tid] == Unblocking) { + toFetch->decodeUnblock[tid] = 1; + } + + // Set status to squashing. + decodeStatus[tid] = Squashing; + + for (int i=0; isize; i++) { + if (fromFetch->insts[i]->threadNumber == tid && + fromFetch->insts[i]->seqNum > squash_seq_num) { + fromFetch->insts[i]->setSquashed(); + } + } + + // Clear the instruction list and skid buffer in case they have any + // insts in them. + while (!insts[tid].empty()) { + insts[tid].pop(); + } + + while (!skidBuffer[tid].empty()) { + skidBuffer[tid].pop(); + } + + // Squash instructions up until this one + cpu->removeInstsUntil(squash_seq_num, tid); +} + +unsigned +DefaultDecode::squash(ThreadID tid) +{ + DPRINTF(Decode, "[tid:%i] Squashing.\n",tid); + + if (decodeStatus[tid] == Blocked || + decodeStatus[tid] == Unblocking) { + if (FullSystem) { + toFetch->decodeUnblock[tid] = 1; + } else { + // In syscall emulation, we can have both a block and a squash due + // to a syscall in the same cycle. This would cause both signals + // to be high. This shouldn't happen in full system. + // @todo: Determine if this still happens. + if (toFetch->decodeBlock[tid]) + toFetch->decodeBlock[tid] = 0; + else + toFetch->decodeUnblock[tid] = 1; + } + } + + // Set status to squashing. + decodeStatus[tid] = Squashing; + + // Go through incoming instructions from fetch and squash them. + unsigned squash_count = 0; + + for (int i=0; isize; i++) { + if (fromFetch->insts[i]->threadNumber == tid) { + fromFetch->insts[i]->setSquashed(); + squash_count++; + } + } + + // Clear the instruction list and skid buffer in case they have any + // insts in them. + while (!insts[tid].empty()) { + insts[tid].pop(); + } + + while (!skidBuffer[tid].empty()) { + skidBuffer[tid].pop(); + } + + return squash_count; +} + +void +DefaultDecode::skidInsert(ThreadID tid) +{ + O3DynInstPtr inst = NULL; + + while (!insts[tid].empty()) { + inst = insts[tid].front(); + + insts[tid].pop(); + + assert(tid == inst->threadNumber); + + skidBuffer[tid].push(inst); + + DPRINTF(Decode, "Inserting [tid:%d][sn:%lli] PC: %s into decode " + "skidBuffer %i\n", inst->threadNumber, inst->seqNum, + inst->pcState(), skidBuffer[tid].size()); + } + + // @todo: Eventually need to enforce this by not letting a thread + // fetch past its skidbuffer + assert(skidBuffer[tid].size() <= skidBufferMax); +} + +bool +DefaultDecode::skidsEmpty() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + if (!skidBuffer[tid].empty()) + return false; + } + + return true; +} + +void +DefaultDecode::updateStatus() +{ + bool any_unblocking = false; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (decodeStatus[tid] == Unblocking) { + any_unblocking = true; + break; + } + } + + // Decode will have activity if it's unblocking. + if (any_unblocking) { + if (_status == Inactive) { + _status = Active; + + DPRINTF(Activity, "Activating stage.\n"); + + cpu->activateStage(FullO3CPU::DecodeIdx); + } + } else { + // If it's not unblocking, then decode will not have any internal + // activity. Switch it to inactive. + if (_status == Active) { + _status = Inactive; + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(FullO3CPU::DecodeIdx); + } + } +} + +void +DefaultDecode::sortInsts() +{ + int insts_from_fetch = fromFetch->size; + for (int i = 0; i < insts_from_fetch; ++i) { + insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]); + } +} + +void +DefaultDecode::readStallSignals(ThreadID tid) +{ + if (fromRename->renameBlock[tid]) { + stalls[tid].rename = true; + } + + if (fromRename->renameUnblock[tid]) { + assert(stalls[tid].rename); + stalls[tid].rename = false; + } +} + +bool +DefaultDecode::checkSignalsAndUpdate(ThreadID tid) +{ + // Check if there's a squash signal, squash if there is. + // Check stall signals, block if necessary. + // If status was blocked + // Check if stall conditions have passed + // if so then go to unblocking + // If status was Squashing + // check if squashing is not high. Switch to running this cycle. + + // Update the per thread stall statuses. + readStallSignals(tid); + + // Check squash signals from commit. + if (fromCommit->commitInfo[tid].squash) { + + DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash " + "from commit.\n", tid); + + squash(tid); + + return true; + } + + if (checkStall(tid)) { + return block(tid); + } + + if (decodeStatus[tid] == Blocked) { + DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n", + tid); + + decodeStatus[tid] = Unblocking; + + unblock(tid); + + return true; + } + + if (decodeStatus[tid] == Squashing) { + // Switch status to running if decode isn't being told to block or + // squash this cycle. + DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n", + tid); + + decodeStatus[tid] = Running; + + return false; + } + + // If we've reached this point, we have not gotten any signals that + // cause decode to change its status. Decode remains the same as before. + return false; +} + +void +DefaultDecode::tick() +{ + wroteToTimeBuffer = false; + + bool status_change = false; + + toRenameIndex = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + sortInsts(); + + //Check stall and squash signals. + while (threads != end) { + ThreadID tid = *threads++; + + DPRINTF(Decode,"Processing [tid:%i]\n",tid); + status_change = checkSignalsAndUpdate(tid) || status_change; + + decode(status_change, tid); + } + + if (status_change) { + updateStatus(); + } + + if (wroteToTimeBuffer) { + DPRINTF(Activity, "Activity this cycle.\n"); + + cpu->activityThisCycle(); + } +} + +void +DefaultDecode::decode(bool &status_change, ThreadID tid) +{ + // If status is Running or idle, + // call decodeInsts() + // If status is Unblocking, + // buffer any instructions coming from fetch + // continue trying to empty skid buffer + // check if stall conditions have passed + + if (decodeStatus[tid] == Blocked) { + ++stats.blockedCycles; + } else if (decodeStatus[tid] == Squashing) { + ++stats.squashCycles; + } + + // Decode should try to decode as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (decodeStatus[tid] == Running || + decodeStatus[tid] == Idle) { + DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run " + "stage.\n",tid); + + decodeInsts(tid); + } else if (decodeStatus[tid] == Unblocking) { + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(!skidsEmpty()); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + decodeInsts(tid); + + if (fetchInstsValid()) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + } + + status_change = unblock(tid) || status_change; + } +} + +void +DefaultDecode::decodeInsts(ThreadID tid) +{ + // Instructions can come either from the skid buffer or the list of + // instructions coming from fetch, depending on decode's status. + int insts_available = decodeStatus[tid] == Unblocking ? + skidBuffer[tid].size() : insts[tid].size(); + + if (insts_available == 0) { + DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out" + " early.\n",tid); + // Should I change the status to idle? + ++stats.idleCycles; + return; + } else if (decodeStatus[tid] == Unblocking) { + DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid " + "buffer.\n",tid); + ++stats.unblockCycles; + } else if (decodeStatus[tid] == Running) { + ++stats.runCycles; + } + + std::queue + &insts_to_decode = decodeStatus[tid] == Unblocking ? + skidBuffer[tid] : insts[tid]; + + DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid); + + while (insts_available > 0 && toRenameIndex < decodeWidth) { + assert(!insts_to_decode.empty()); + + O3DynInstPtr inst = std::move(insts_to_decode.front()); + + insts_to_decode.pop(); + + DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with " + "PC %s\n", tid, inst->seqNum, inst->pcState()); + + if (inst->isSquashed()) { + DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is " + "squashed, skipping.\n", + tid, inst->seqNum, inst->pcState()); + + ++stats.squashedInsts; + + --insts_available; + + continue; + } + + // Also check if instructions have no source registers. Mark + // them as ready to issue at any time. Not sure if this check + // should exist here or at a later stage; however it doesn't matter + // too much for function correctness. + if (inst->numSrcRegs() == 0) { + inst->setCanIssue(); + } + + // This current instruction is valid, so add it into the decode + // queue. The next instruction may not be valid, so check to + // see if branches were predicted correctly. + toRename->insts[toRenameIndex] = inst; + + ++(toRename->size); + ++toRenameIndex; + ++stats.decodedInsts; + --insts_available; + +#if TRACING_ON + if (Debug::O3PipeView) { + inst->decodeTick = curTick() - inst->fetchTick; + } +#endif + + // Ensure that if it was predicted as a branch, it really is a + // branch. + if (inst->readPredTaken() && !inst->isControl()) { + panic("Instruction predicted as a branch!"); + + ++stats.controlMispred; + + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst, inst->threadNumber); + + break; + } + + // Go ahead and compute any PC-relative branches. + // This includes direct unconditional control and + // direct conditional control that is predicted taken. + if (inst->isDirectCtrl() && + (inst->isUncondCtrl() || inst->readPredTaken())) + { + ++stats.branchResolved; + + if (!(inst->branchTarget() == inst->readPredTarg())) { + ++stats.branchMispred; + + // Might want to set some sort of boolean and just do + // a check at the end + squash(inst, inst->threadNumber); + TheISA::PCState target = inst->branchTarget(); + + DPRINTF(Decode, + "[tid:%i] [sn:%llu] " + "Updating predictions: Wrong predicted target: %s \ + PredPC: %s\n", + tid, inst->seqNum, inst->readPredTarg(), target); + //The micro pc after an instruction level branch should be 0 + inst->setPredTarg(target); + break; + } + } + } + + // If we didn't process all instructions, then we will need to block + // and put all those instructions into the skid buffer. + if (!insts_to_decode.empty()) { + block(tid); + } + + // Record that decode has written to the time buffer for activity + // tracking. + if (toRenameIndex) { + wroteToTimeBuffer = true; + } +} diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index 20cb467d64..1f37617015 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -46,6 +46,7 @@ #include "base/statistics.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/impl.hh" #include "cpu/o3/limits.hh" #include "cpu/timebuf.hh" @@ -61,7 +62,6 @@ class FullO3CPU; * actually decoded when the StaticInst is created, this stage does * not do much other than check any PC-relative branches. */ -template class DefaultDecode { public: @@ -94,7 +94,7 @@ class DefaultDecode public: /** DefaultDecode constructor. */ - DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams ¶ms); + DefaultDecode(FullO3CPU *_cpu, const DerivO3CPUParams ¶ms); void startupStage(); @@ -173,7 +173,7 @@ class DefaultDecode bool checkStall(ThreadID tid) const; /** Returns if there any instructions from fetch on this cycle. */ - inline bool fetchInstsValid(); + bool fetchInstsValid(); /** Switches decode to blocking, and signals back that decode has * become blocked. @@ -201,35 +201,35 @@ class DefaultDecode private: // Interfaces to objects outside of decode. /** CPU interface. */ - FullO3CPU *cpu; + FullO3CPU *cpu; /** Time buffer interface. */ TimeBuffer *timeBuffer; /** Wire to get rename's output from backwards time buffer. */ - typename TimeBuffer::wire fromRename; + TimeBuffer::wire fromRename; /** Wire to get iew's information from backwards time buffer. */ - typename TimeBuffer::wire fromIEW; + TimeBuffer::wire fromIEW; /** Wire to get commit's information from backwards time buffer. */ - typename TimeBuffer::wire fromCommit; + TimeBuffer::wire fromCommit; /** Wire to write information heading to previous stages. */ // Might not be the best name as not only fetch will read it. - typename TimeBuffer::wire toFetch; + TimeBuffer::wire toFetch; /** Decode instruction queue. */ TimeBuffer *decodeQueue; /** Wire used to write any information heading to rename. */ - typename TimeBuffer::wire toRename; + TimeBuffer::wire toRename; /** Fetch instruction queue interface. */ TimeBuffer *fetchQueue; /** Wire to get fetch's output from fetch queue. */ - typename TimeBuffer::wire fromFetch; + TimeBuffer::wire fromFetch; /** Queue of all instructions coming from fetch this cycle. */ std::queue insts[O3MaxThreads]; @@ -292,7 +292,7 @@ class DefaultDecode struct DecodeStats : public Stats::Group { - DecodeStats(FullO3CPU *cpu); + DecodeStats(FullO3CPU *cpu); /** Stat for total number of idle cycles. */ Stats::Scalar idleCycles; diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh deleted file mode 100644 index 90feb1c9df..0000000000 --- a/src/cpu/o3/decode_impl.hh +++ /dev/null @@ -1,767 +0,0 @@ -/* - * Copyright (c) 2012, 2014 ARM Limited - * All rights reserved - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_DECODE_IMPL_HH__ -#define __CPU_O3_DECODE_IMPL_HH__ - -#include "arch/types.hh" -#include "base/trace.hh" -#include "config/the_isa.hh" -#include "cpu/inst_seq.hh" -#include "cpu/o3/decode.hh" -#include "cpu/o3/dyn_inst.hh" -#include "cpu/o3/limits.hh" -#include "debug/Activity.hh" -#include "debug/Decode.hh" -#include "debug/O3PipeView.hh" -#include "params/DerivO3CPU.hh" -#include "sim/full_system.hh" - -// clang complains about std::set being overloaded with Packet::set if -// we open up the entire namespace std -using std::list; - -template -DefaultDecode::DefaultDecode(FullO3CPU *_cpu, - const DerivO3CPUParams ¶ms) - : cpu(_cpu), - renameToDecodeDelay(params.renameToDecodeDelay), - iewToDecodeDelay(params.iewToDecodeDelay), - commitToDecodeDelay(params.commitToDecodeDelay), - fetchToDecodeDelay(params.fetchToDecodeDelay), - decodeWidth(params.decodeWidth), - numThreads(params.numThreads), - stats(_cpu) -{ - if (decodeWidth > O3MaxWidth) - fatal("decodeWidth (%d) is larger than compiled limit (%d),\n" - "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n", - decodeWidth, static_cast(O3MaxWidth)); - - // @todo: Make into a parameter - skidBufferMax = (fetchToDecodeDelay + 1) * params.fetchWidth; - for (int tid = 0; tid < O3MaxThreads; tid++) { - stalls[tid] = {false}; - decodeStatus[tid] = Idle; - bdelayDoneSeqNum[tid] = 0; - squashInst[tid] = nullptr; - squashAfterDelaySlot[tid] = 0; - } -} - -template -void -DefaultDecode::startupStage() -{ - resetStage(); -} - -template -void -DefaultDecode::clearStates(ThreadID tid) -{ - decodeStatus[tid] = Idle; - stalls[tid].rename = false; -} - -template -void -DefaultDecode::resetStage() -{ - _status = Inactive; - - // Setup status, make sure stall signals are clear. - for (ThreadID tid = 0; tid < numThreads; ++tid) { - decodeStatus[tid] = Idle; - - stalls[tid].rename = false; - } -} - -template -std::string -DefaultDecode::name() const -{ - return cpu->name() + ".decode"; -} - -template -DefaultDecode::DecodeStats::DecodeStats(FullO3CPU *cpu) - : Stats::Group(cpu, "decode"), - ADD_STAT(idleCycles, Stats::Units::Cycle::get(), - "Number of cycles decode is idle"), - ADD_STAT(blockedCycles, Stats::Units::Cycle::get(), - "Number of cycles decode is blocked"), - ADD_STAT(runCycles, Stats::Units::Cycle::get(), - "Number of cycles decode is running"), - ADD_STAT(unblockCycles, Stats::Units::Cycle::get(), - "Number of cycles decode is unblocking"), - ADD_STAT(squashCycles, Stats::Units::Cycle::get(), - "Number of cycles decode is squashing"), - ADD_STAT(branchResolved, Stats::Units::Count::get(), - "Number of times decode resolved a branch"), - ADD_STAT(branchMispred, Stats::Units::Count::get(), - "Number of times decode detected a branch misprediction"), - ADD_STAT(controlMispred, Stats::Units::Count::get(), - "Number of times decode detected an instruction incorrectly " - "predicted as a control"), - ADD_STAT(decodedInsts, Stats::Units::Count::get(), - "Number of instructions handled by decode"), - ADD_STAT(squashedInsts, Stats::Units::Count::get(), - "Number of squashed instructions handled by decode") -{ - idleCycles.prereq(idleCycles); - blockedCycles.prereq(blockedCycles); - runCycles.prereq(runCycles); - unblockCycles.prereq(unblockCycles); - squashCycles.prereq(squashCycles); - branchResolved.prereq(branchResolved); - branchMispred.prereq(branchMispred); - controlMispred.prereq(controlMispred); - decodedInsts.prereq(decodedInsts); - squashedInsts.prereq(squashedInsts); -} - -template -void -DefaultDecode::setTimeBuffer(TimeBuffer *tb_ptr) -{ - timeBuffer = tb_ptr; - - // Setup wire to write information back to fetch. - toFetch = timeBuffer->getWire(0); - - // Create wires to get information from proper places in time buffer. - fromRename = timeBuffer->getWire(-renameToDecodeDelay); - fromIEW = timeBuffer->getWire(-iewToDecodeDelay); - fromCommit = timeBuffer->getWire(-commitToDecodeDelay); -} - -template -void -DefaultDecode::setDecodeQueue(TimeBuffer *dq_ptr) -{ - decodeQueue = dq_ptr; - - // Setup wire to write information to proper place in decode queue. - toRename = decodeQueue->getWire(0); -} - -template -void -DefaultDecode::setFetchQueue(TimeBuffer *fq_ptr) -{ - fetchQueue = fq_ptr; - - // Setup wire to read information from fetch queue. - fromFetch = fetchQueue->getWire(-fetchToDecodeDelay); -} - -template -void -DefaultDecode::setActiveThreads(std::list *at_ptr) -{ - activeThreads = at_ptr; -} - -template -void -DefaultDecode::drainSanityCheck() const -{ - for (ThreadID tid = 0; tid < numThreads; ++tid) { - assert(insts[tid].empty()); - assert(skidBuffer[tid].empty()); - } -} - -template -bool -DefaultDecode::isDrained() const -{ - for (ThreadID tid = 0; tid < numThreads; ++tid) { - if (!insts[tid].empty() || !skidBuffer[tid].empty() || - (decodeStatus[tid] != Running && decodeStatus[tid] != Idle)) - return false; - } - return true; -} - -template -bool -DefaultDecode::checkStall(ThreadID tid) const -{ - bool ret_val = false; - - if (stalls[tid].rename) { - DPRINTF(Decode,"[tid:%i] Stall fom Rename stage detected.\n", tid); - ret_val = true; - } - - return ret_val; -} - -template -inline bool -DefaultDecode::fetchInstsValid() -{ - return fromFetch->size > 0; -} - -template -bool -DefaultDecode::block(ThreadID tid) -{ - DPRINTF(Decode, "[tid:%i] Blocking.\n", tid); - - // Add the current inputs to the skid buffer so they can be - // reprocessed when this stage unblocks. - skidInsert(tid); - - // If the decode status is blocked or unblocking then decode has not yet - // signalled fetch to unblock. In that case, there is no need to tell - // fetch to block. - if (decodeStatus[tid] != Blocked) { - // Set the status to Blocked. - decodeStatus[tid] = Blocked; - - if (toFetch->decodeUnblock[tid]) { - toFetch->decodeUnblock[tid] = false; - } else { - toFetch->decodeBlock[tid] = true; - wroteToTimeBuffer = true; - } - - return true; - } - - return false; -} - -template -bool -DefaultDecode::unblock(ThreadID tid) -{ - // Decode is done unblocking only if the skid buffer is empty. - if (skidBuffer[tid].empty()) { - DPRINTF(Decode, "[tid:%i] Done unblocking.\n", tid); - toFetch->decodeUnblock[tid] = true; - wroteToTimeBuffer = true; - - decodeStatus[tid] = Running; - return true; - } - - DPRINTF(Decode, "[tid:%i] Currently unblocking.\n", tid); - - return false; -} - -template -void -DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid) -{ - DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch " - "prediction detected at decode.\n", tid, inst->seqNum); - - // Send back mispredict information. - toFetch->decodeInfo[tid].branchMispredict = true; - toFetch->decodeInfo[tid].predIncorrect = true; - toFetch->decodeInfo[tid].mispredictInst = inst; - toFetch->decodeInfo[tid].squash = true; - toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum; - toFetch->decodeInfo[tid].nextPC = inst->branchTarget(); - toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching(); - toFetch->decodeInfo[tid].squashInst = inst; - if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) { - toFetch->decodeInfo[tid].branchTaken = true; - } - - InstSeqNum squash_seq_num = inst->seqNum; - - // Might have to tell fetch to unblock. - if (decodeStatus[tid] == Blocked || - decodeStatus[tid] == Unblocking) { - toFetch->decodeUnblock[tid] = 1; - } - - // Set status to squashing. - decodeStatus[tid] = Squashing; - - for (int i=0; isize; i++) { - if (fromFetch->insts[i]->threadNumber == tid && - fromFetch->insts[i]->seqNum > squash_seq_num) { - fromFetch->insts[i]->setSquashed(); - } - } - - // Clear the instruction list and skid buffer in case they have any - // insts in them. - while (!insts[tid].empty()) { - insts[tid].pop(); - } - - while (!skidBuffer[tid].empty()) { - skidBuffer[tid].pop(); - } - - // Squash instructions up until this one - cpu->removeInstsUntil(squash_seq_num, tid); -} - -template -unsigned -DefaultDecode::squash(ThreadID tid) -{ - DPRINTF(Decode, "[tid:%i] Squashing.\n",tid); - - if (decodeStatus[tid] == Blocked || - decodeStatus[tid] == Unblocking) { - if (FullSystem) { - toFetch->decodeUnblock[tid] = 1; - } else { - // In syscall emulation, we can have both a block and a squash due - // to a syscall in the same cycle. This would cause both signals - // to be high. This shouldn't happen in full system. - // @todo: Determine if this still happens. - if (toFetch->decodeBlock[tid]) - toFetch->decodeBlock[tid] = 0; - else - toFetch->decodeUnblock[tid] = 1; - } - } - - // Set status to squashing. - decodeStatus[tid] = Squashing; - - // Go through incoming instructions from fetch and squash them. - unsigned squash_count = 0; - - for (int i=0; isize; i++) { - if (fromFetch->insts[i]->threadNumber == tid) { - fromFetch->insts[i]->setSquashed(); - squash_count++; - } - } - - // Clear the instruction list and skid buffer in case they have any - // insts in them. - while (!insts[tid].empty()) { - insts[tid].pop(); - } - - while (!skidBuffer[tid].empty()) { - skidBuffer[tid].pop(); - } - - return squash_count; -} - -template -void -DefaultDecode::skidInsert(ThreadID tid) -{ - O3DynInstPtr inst = NULL; - - while (!insts[tid].empty()) { - inst = insts[tid].front(); - - insts[tid].pop(); - - assert(tid == inst->threadNumber); - - skidBuffer[tid].push(inst); - - DPRINTF(Decode,"Inserting [tid:%d][sn:%lli] PC: %s into decode skidBuffer %i\n", - inst->threadNumber, inst->seqNum, inst->pcState(), skidBuffer[tid].size()); - } - - // @todo: Eventually need to enforce this by not letting a thread - // fetch past its skidbuffer - assert(skidBuffer[tid].size() <= skidBufferMax); -} - -template -bool -DefaultDecode::skidsEmpty() -{ - list::iterator threads = activeThreads->begin(); - list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - if (!skidBuffer[tid].empty()) - return false; - } - - return true; -} - -template -void -DefaultDecode::updateStatus() -{ - bool any_unblocking = false; - - list::iterator threads = activeThreads->begin(); - list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - - if (decodeStatus[tid] == Unblocking) { - any_unblocking = true; - break; - } - } - - // Decode will have activity if it's unblocking. - if (any_unblocking) { - if (_status == Inactive) { - _status = Active; - - DPRINTF(Activity, "Activating stage.\n"); - - cpu->activateStage(FullO3CPU::DecodeIdx); - } - } else { - // If it's not unblocking, then decode will not have any internal - // activity. Switch it to inactive. - if (_status == Active) { - _status = Inactive; - DPRINTF(Activity, "Deactivating stage.\n"); - - cpu->deactivateStage(FullO3CPU::DecodeIdx); - } - } -} - -template -void -DefaultDecode::sortInsts() -{ - int insts_from_fetch = fromFetch->size; - for (int i = 0; i < insts_from_fetch; ++i) { - insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]); - } -} - -template -void -DefaultDecode::readStallSignals(ThreadID tid) -{ - if (fromRename->renameBlock[tid]) { - stalls[tid].rename = true; - } - - if (fromRename->renameUnblock[tid]) { - assert(stalls[tid].rename); - stalls[tid].rename = false; - } -} - -template -bool -DefaultDecode::checkSignalsAndUpdate(ThreadID tid) -{ - // Check if there's a squash signal, squash if there is. - // Check stall signals, block if necessary. - // If status was blocked - // Check if stall conditions have passed - // if so then go to unblocking - // If status was Squashing - // check if squashing is not high. Switch to running this cycle. - - // Update the per thread stall statuses. - readStallSignals(tid); - - // Check squash signals from commit. - if (fromCommit->commitInfo[tid].squash) { - - DPRINTF(Decode, "[tid:%i] Squashing instructions due to squash " - "from commit.\n", tid); - - squash(tid); - - return true; - } - - if (checkStall(tid)) { - return block(tid); - } - - if (decodeStatus[tid] == Blocked) { - DPRINTF(Decode, "[tid:%i] Done blocking, switching to unblocking.\n", - tid); - - decodeStatus[tid] = Unblocking; - - unblock(tid); - - return true; - } - - if (decodeStatus[tid] == Squashing) { - // Switch status to running if decode isn't being told to block or - // squash this cycle. - DPRINTF(Decode, "[tid:%i] Done squashing, switching to running.\n", - tid); - - decodeStatus[tid] = Running; - - return false; - } - - // If we've reached this point, we have not gotten any signals that - // cause decode to change its status. Decode remains the same as before. - return false; -} - -template -void -DefaultDecode::tick() -{ - wroteToTimeBuffer = false; - - bool status_change = false; - - toRenameIndex = 0; - - list::iterator threads = activeThreads->begin(); - list::iterator end = activeThreads->end(); - - sortInsts(); - - //Check stall and squash signals. - while (threads != end) { - ThreadID tid = *threads++; - - DPRINTF(Decode,"Processing [tid:%i]\n",tid); - status_change = checkSignalsAndUpdate(tid) || status_change; - - decode(status_change, tid); - } - - if (status_change) { - updateStatus(); - } - - if (wroteToTimeBuffer) { - DPRINTF(Activity, "Activity this cycle.\n"); - - cpu->activityThisCycle(); - } -} - -template -void -DefaultDecode::decode(bool &status_change, ThreadID tid) -{ - // If status is Running or idle, - // call decodeInsts() - // If status is Unblocking, - // buffer any instructions coming from fetch - // continue trying to empty skid buffer - // check if stall conditions have passed - - if (decodeStatus[tid] == Blocked) { - ++stats.blockedCycles; - } else if (decodeStatus[tid] == Squashing) { - ++stats.squashCycles; - } - - // Decode should try to decode as many instructions as its bandwidth - // will allow, as long as it is not currently blocked. - if (decodeStatus[tid] == Running || - decodeStatus[tid] == Idle) { - DPRINTF(Decode, "[tid:%i] Not blocked, so attempting to run " - "stage.\n",tid); - - decodeInsts(tid); - } else if (decodeStatus[tid] == Unblocking) { - // Make sure that the skid buffer has something in it if the - // status is unblocking. - assert(!skidsEmpty()); - - // If the status was unblocking, then instructions from the skid - // buffer were used. Remove those instructions and handle - // the rest of unblocking. - decodeInsts(tid); - - if (fetchInstsValid()) { - // Add the current inputs to the skid buffer so they can be - // reprocessed when this stage unblocks. - skidInsert(tid); - } - - status_change = unblock(tid) || status_change; - } -} - -template -void -DefaultDecode::decodeInsts(ThreadID tid) -{ - // Instructions can come either from the skid buffer or the list of - // instructions coming from fetch, depending on decode's status. - int insts_available = decodeStatus[tid] == Unblocking ? - skidBuffer[tid].size() : insts[tid].size(); - - if (insts_available == 0) { - DPRINTF(Decode, "[tid:%i] Nothing to do, breaking out" - " early.\n",tid); - // Should I change the status to idle? - ++stats.idleCycles; - return; - } else if (decodeStatus[tid] == Unblocking) { - DPRINTF(Decode, "[tid:%i] Unblocking, removing insts from skid " - "buffer.\n",tid); - ++stats.unblockCycles; - } else if (decodeStatus[tid] == Running) { - ++stats.runCycles; - } - - std::queue - &insts_to_decode = decodeStatus[tid] == Unblocking ? - skidBuffer[tid] : insts[tid]; - - DPRINTF(Decode, "[tid:%i] Sending instruction to rename.\n",tid); - - while (insts_available > 0 && toRenameIndex < decodeWidth) { - assert(!insts_to_decode.empty()); - - O3DynInstPtr inst = std::move(insts_to_decode.front()); - - insts_to_decode.pop(); - - DPRINTF(Decode, "[tid:%i] Processing instruction [sn:%lli] with " - "PC %s\n", tid, inst->seqNum, inst->pcState()); - - if (inst->isSquashed()) { - DPRINTF(Decode, "[tid:%i] Instruction %i with PC %s is " - "squashed, skipping.\n", - tid, inst->seqNum, inst->pcState()); - - ++stats.squashedInsts; - - --insts_available; - - continue; - } - - // Also check if instructions have no source registers. Mark - // them as ready to issue at any time. Not sure if this check - // should exist here or at a later stage; however it doesn't matter - // too much for function correctness. - if (inst->numSrcRegs() == 0) { - inst->setCanIssue(); - } - - // This current instruction is valid, so add it into the decode - // queue. The next instruction may not be valid, so check to - // see if branches were predicted correctly. - toRename->insts[toRenameIndex] = inst; - - ++(toRename->size); - ++toRenameIndex; - ++stats.decodedInsts; - --insts_available; - -#if TRACING_ON - if (Debug::O3PipeView) { - inst->decodeTick = curTick() - inst->fetchTick; - } -#endif - - // Ensure that if it was predicted as a branch, it really is a - // branch. - if (inst->readPredTaken() && !inst->isControl()) { - panic("Instruction predicted as a branch!"); - - ++stats.controlMispred; - - // Might want to set some sort of boolean and just do - // a check at the end - squash(inst, inst->threadNumber); - - break; - } - - // Go ahead and compute any PC-relative branches. - // This includes direct unconditional control and - // direct conditional control that is predicted taken. - if (inst->isDirectCtrl() && - (inst->isUncondCtrl() || inst->readPredTaken())) - { - ++stats.branchResolved; - - if (!(inst->branchTarget() == inst->readPredTarg())) { - ++stats.branchMispred; - - // Might want to set some sort of boolean and just do - // a check at the end - squash(inst, inst->threadNumber); - TheISA::PCState target = inst->branchTarget(); - - DPRINTF(Decode, - "[tid:%i] [sn:%llu] " - "Updating predictions: Wrong predicted target: %s \ - PredPC: %s\n", - tid, inst->seqNum, inst->readPredTarg(), target); - //The micro pc after an instruction level branch should be 0 - inst->setPredTarg(target); - break; - } - } - } - - // If we didn't process all instructions, then we will need to block - // and put all those instructions into the skid buffer. - if (!insts_to_decode.empty()) { - block(tid); - } - - // Record that decode has written to the time buffer for activity - // tracking. - if (toRenameIndex) { - wroteToTimeBuffer = true; - } -} - -#endif//__CPU_O3_DECODE_IMPL_HH__