From f4a33801671511f8f1a761501d11c42e5c916d76 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@gmail.com>
Date: Wed, 3 Mar 2021 03:12:06 -0800
Subject: [PATCH] cpu: De-templatize the O3 DefaultFetch.

Change-Id: I5d4ce7a269c9f1df497003404872a977e7edb575
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42109
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/cpu/o3/cpu.hh        |    2 +-
 src/cpu/o3/fetch.cc      | 1598 +++++++++++++++++++++++++++++++++++-
 src/cpu/o3/fetch.hh      |   69 +-
 src/cpu/o3/fetch_impl.hh | 1664 --------------------------------------
 4 files changed, 1622 insertions(+), 1711 deletions(-)
 delete mode 100644 src/cpu/o3/fetch_impl.hh
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 2b220024d8..fdbd1fffe6 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -491,7 +491,7 @@ class FullO3CPU : public BaseO3CPU
 
   protected:
     /** The fetch stage. */
-    DefaultFetch<Impl> fetch;
+    DefaultFetch fetch;
 
     /** The decode stage. */
     DefaultDecode<Impl> decode;
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index f027b92847..fe2b416025 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -1,5 +1,18 @@
 /*
- * Copyright (c) 2004-2005 The Regents of The University of Michigan
+ * Copyright (c) 2010-2014 ARM Limited
+ * Copyright (c) 2012-2013 AMD
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2004-2006 The Regents of The University of Michigan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +39,1584 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "cpu/o3/fetch_impl.hh"
-#include "cpu/o3/isa_specific.hh"
+#include "cpu/o3/fetch.hh"
 
-template class DefaultFetch<O3CPUImpl>;
+#include <algorithm>
+#include <cstring>
+#include <list>
+#include <map>
+#include <queue>
+
+#include "arch/generic/tlb.hh"
+#include "base/random.hh"
+#include "base/types.hh"
+#include "config/the_isa.hh"
+#include "cpu/base.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/nop_static_inst.hh"
+#include "cpu/o3/cpu.hh"
+#include "cpu/o3/isa_specific.hh"
+#include "cpu/o3/limits.hh"
+#include "debug/Activity.hh"
+#include "debug/Drain.hh"
+#include "debug/Fetch.hh"
+#include "debug/O3CPU.hh"
+#include "debug/O3PipeView.hh"
+#include "mem/packet.hh"
+#include "params/DerivO3CPU.hh"
+#include "sim/byteswap.hh"
+#include "sim/core.hh"
+#include "sim/eventq.hh"
+#include "sim/full_system.hh"
+#include "sim/system.hh"
+
+DefaultFetch::IcachePort::IcachePort(DefaultFetch *_fetch,
+            FullO3CPU<O3CPUImpl>* _cpu) :
+        RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch)
+{}
+
+
+DefaultFetch::DefaultFetch(FullO3CPU<O3CPUImpl> *_cpu,
+        const DerivO3CPUParams &params)
+    : fetchPolicy(params.smtFetchPolicy),
+      cpu(_cpu),
+      branchPred(nullptr),
+      decodeToFetchDelay(params.decodeToFetchDelay),
+      renameToFetchDelay(params.renameToFetchDelay),
+      iewToFetchDelay(params.iewToFetchDelay),
+      commitToFetchDelay(params.commitToFetchDelay),
+      fetchWidth(params.fetchWidth),
+      decodeWidth(params.decodeWidth),
+      retryPkt(NULL),
+      retryTid(InvalidThreadID),
+      cacheBlkSize(cpu->cacheLineSize()),
+      fetchBufferSize(params.fetchBufferSize),
+      fetchBufferMask(fetchBufferSize - 1),
+      fetchQueueSize(params.fetchQueueSize),
+      numThreads(params.numThreads),
+      numFetchingThreads(params.smtNumFetchingThreads),
+      icachePort(this, _cpu),
+      finishTranslationEvent(this), fetchStats(_cpu, this)
+{
+    if (numThreads > O3MaxThreads)
+        fatal("numThreads (%d) is larger than compiled limit (%d),\n"
+              "\tincrease O3MaxThreads in src/cpu/o3/limits.hh\n",
+              numThreads, static_cast<int>(O3MaxThreads));
+    if (fetchWidth > O3MaxWidth)
+        fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
+             "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
+             fetchWidth, static_cast<int>(O3MaxWidth));
+    if (fetchBufferSize > cacheBlkSize)
+        fatal("fetch buffer size (%u bytes) is greater than the cache "
+              "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
+    if (cacheBlkSize % fetchBufferSize)
+        fatal("cache block (%u bytes) is not a multiple of the "
+              "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
+
+    // Get the size of an instruction.
+    instSize = sizeof(TheISA::MachInst);
+
+    for (int i = 0; i < O3MaxThreads; i++) {
+        fetchStatus[i] = Idle;
+        decoder[i] = nullptr;
+        pc[i] = 0;
+        fetchOffset[i] = 0;
+        macroop[i] = nullptr;
+        delayedCommit[i] = false;
+        memReq[i] = nullptr;
+        stalls[i] = {false, false};
+        fetchBuffer[i] = NULL;
+        fetchBufferPC[i] = 0;
+        fetchBufferValid[i] = false;
+        lastIcacheStall[i] = 0;
+        issuePipelinedIfetch[i] = false;
+    }
+
+    branchPred = params.branchPred;
+
+    for (ThreadID tid = 0; tid < numThreads; tid++) {
+        decoder[tid] = new TheISA::Decoder(
+                dynamic_cast<TheISA::ISA *>(params.isa[tid]));
+        // Create space to buffer the cache line data,
+        // which may not hold the entire cache line.
+        fetchBuffer[tid] = new uint8_t[fetchBufferSize];
+    }
+}
+
+std::string DefaultFetch::name() const { return cpu->name() + ".fetch"; }
+
+void
+DefaultFetch::regProbePoints()
+{
+    ppFetch = new ProbePointArg<O3DynInstPtr>(cpu->getProbeManager(), "Fetch");
+    ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
+                                                       "FetchRequest");
+
+}
+
+DefaultFetch::FetchStatGroup::FetchStatGroup(
+        FullO3CPU<O3CPUImpl> *cpu, DefaultFetch *fetch)
+    : Stats::Group(cpu, "fetch"),
+    ADD_STAT(icacheStallCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch is stalled on an Icache miss"),
+    ADD_STAT(insts, Stats::Units::Count::get(),
+             "Number of instructions fetch has processed"),
+    ADD_STAT(branches, Stats::Units::Count::get(),
+             "Number of branches that fetch encountered"),
+    ADD_STAT(predictedBranches, Stats::Units::Count::get(),
+             "Number of branches that fetch has predicted taken"),
+    ADD_STAT(cycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has run and was not squashing or "
+             "blocked"),
+    ADD_STAT(squashCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has spent squashing"),
+    ADD_STAT(tlbCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has spent waiting for tlb"),
+    ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch was idle"),
+    ADD_STAT(blockedCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has spent blocked"),
+    ADD_STAT(miscStallCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has spent waiting on interrupts, or bad "
+             "addresses, or out of MSHRs"),
+    ADD_STAT(pendingDrainCycles, Stats::Units::Cycle::get(),
+             "Number of cycles fetch has spent waiting on pipes to drain"),
+    ADD_STAT(noActiveThreadStallCycles, Stats::Units::Cycle::get(),
+             "Number of stall cycles due to no active thread to fetch from"),
+    ADD_STAT(pendingTrapStallCycles, Stats::Units::Cycle::get(),
+             "Number of stall cycles due to pending traps"),
+    ADD_STAT(pendingQuiesceStallCycles, Stats::Units::Cycle::get(),
+             "Number of stall cycles due to pending quiesce instructions"),
+    ADD_STAT(icacheWaitRetryStallCycles, Stats::Units::Cycle::get(),
+             "Number of stall cycles due to full MSHR"),
+    ADD_STAT(cacheLines, Stats::Units::Count::get(),
+             "Number of cache lines fetched"),
+    ADD_STAT(icacheSquashes, Stats::Units::Count::get(),
+             "Number of outstanding Icache misses that were squashed"),
+    ADD_STAT(tlbSquashes, Stats::Units::Count::get(),
+             "Number of outstanding ITLB misses that were squashed"),
+    ADD_STAT(nisnDist, Stats::Units::Count::get(),
+             "Number of instructions fetched each cycle (Total)"),
+    ADD_STAT(idleRate, Stats::Units::Ratio::get(),
+             "Ratio of cycles fetch was idle",
+             idleCycles / cpu->baseStats.numCycles),
+    ADD_STAT(branchRate, Stats::Units::Ratio::get(),
+             "Number of branch fetches per cycle",
+             branches / cpu->baseStats.numCycles),
+    ADD_STAT(rate, Stats::Units::Rate<
+                    Stats::Units::Count, Stats::Units::Cycle>::get(),
+             "Number of inst fetches per cycle",
+             insts / cpu->baseStats.numCycles)
+{
+        icacheStallCycles
+            .prereq(icacheStallCycles);
+        insts
+            .prereq(insts);
+        branches
+            .prereq(branches);
+        predictedBranches
+            .prereq(predictedBranches);
+        cycles
+            .prereq(cycles);
+        squashCycles
+            .prereq(squashCycles);
+        tlbCycles
+            .prereq(tlbCycles);
+        idleCycles
+            .prereq(idleCycles);
+        blockedCycles
+            .prereq(blockedCycles);
+        cacheLines
+            .prereq(cacheLines);
+        miscStallCycles
+            .prereq(miscStallCycles);
+        pendingDrainCycles
+            .prereq(pendingDrainCycles);
+        noActiveThreadStallCycles
+            .prereq(noActiveThreadStallCycles);
+        pendingTrapStallCycles
+            .prereq(pendingTrapStallCycles);
+        pendingQuiesceStallCycles
+            .prereq(pendingQuiesceStallCycles);
+        icacheWaitRetryStallCycles
+            .prereq(icacheWaitRetryStallCycles);
+        icacheSquashes
+            .prereq(icacheSquashes);
+        tlbSquashes
+            .prereq(tlbSquashes);
+        nisnDist
+            .init(/* base value */ 0,
+              /* last value */ fetch->fetchWidth,
+              /* bucket size */ 1)
+            .flags(Stats::pdf);
+        idleRate
+            .prereq(idleRate);
+        branchRate
+            .flags(Stats::total);
+        rate
+            .flags(Stats::total);
+}
+void
+DefaultFetch::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *time_buffer)
+{
+    timeBuffer = time_buffer;
+
+    // Create wires to get information from proper places in time buffer.
+    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
+    fromRename = timeBuffer->getWire(-renameToFetchDelay);
+    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
+    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
+}
+
+void
+DefaultFetch::setActiveThreads(std::list<ThreadID> *at_ptr)
+{
+    activeThreads = at_ptr;
+}
+
+void
+DefaultFetch::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *ftb_ptr)
+{
+    // Create wire to write information to proper place in fetch time buf.
+    toDecode = ftb_ptr->getWire(0);
+}
+
+void
+DefaultFetch::startupStage()
+{
+    assert(priorityList.empty());
+    resetStage();
+
+    // Fetch needs to start fetching instructions at the very beginning,
+    // so it must start up in active state.
+    switchToActive();
+}
+
+void
+DefaultFetch::clearStates(ThreadID tid)
+{
+    fetchStatus[tid] = Running;
+    pc[tid] = cpu->pcState(tid);
+    fetchOffset[tid] = 0;
+    macroop[tid] = NULL;
+    delayedCommit[tid] = false;
+    memReq[tid] = NULL;
+    stalls[tid].decode = false;
+    stalls[tid].drain = false;
+    fetchBufferPC[tid] = 0;
+    fetchBufferValid[tid] = false;
+    fetchQueue[tid].clear();
+
+    // TODO not sure what to do with priorityList for now
+    // priorityList.push_back(tid);
+}
+
+void
+DefaultFetch::resetStage()
+{
+    numInst = 0;
+    interruptPending = false;
+    cacheBlocked = false;
+
+    priorityList.clear();
+
+    // Setup PC and nextPC with initial state.
+    for (ThreadID tid = 0; tid < numThreads; ++tid) {
+        fetchStatus[tid] = Running;
+        pc[tid] = cpu->pcState(tid);
+        fetchOffset[tid] = 0;
+        macroop[tid] = NULL;
+
+        delayedCommit[tid] = false;
+        memReq[tid] = NULL;
+
+        stalls[tid].decode = false;
+        stalls[tid].drain = false;
+
+        fetchBufferPC[tid] = 0;
+        fetchBufferValid[tid] = false;
+
+        fetchQueue[tid].clear();
+
+        priorityList.push_back(tid);
+    }
+
+    wroteToTimeBuffer = false;
+    _status = Inactive;
+}
+
+void
+DefaultFetch::processCacheCompletion(PacketPtr pkt)
+{
+    ThreadID tid = cpu->contextToThread(pkt->req->contextId());
+
+    DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
+    assert(!cpu->switchedOut());
+
+    // Only change the status if it's still waiting on the icache access
+    // to return.
+    if (fetchStatus[tid] != IcacheWaitResponse ||
+        pkt->req != memReq[tid]) {
+        ++fetchStats.icacheSquashes;
+        delete pkt;
+        return;
+    }
+
+    memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
+    fetchBufferValid[tid] = true;
+
+    // Wake up the CPU (if it went to sleep and was waiting on
+    // this completion event).
+    cpu->wakeCPU();
+
+    DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
+            tid);
+
+    switchToActive();
+
+    // Only switch to IcacheAccessComplete if we're not stalled as well.
+    if (checkStall(tid)) {
+        fetchStatus[tid] = Blocked;
+    } else {
+        fetchStatus[tid] = IcacheAccessComplete;
+    }
+
+    pkt->req->setAccessLatency();
+    cpu->ppInstAccessComplete->notify(pkt);
+    // Reset the mem req to NULL.
+    delete pkt;
+    memReq[tid] = NULL;
+}
+
+void
+DefaultFetch::drainResume()
+{
+    for (ThreadID i = 0; i < numThreads; ++i) {
+        stalls[i].decode = false;
+        stalls[i].drain = false;
+    }
+}
+
+void
+DefaultFetch::drainSanityCheck() const
+{
+    assert(isDrained());
+    assert(retryPkt == NULL);
+    assert(retryTid == InvalidThreadID);
+    assert(!cacheBlocked);
+    assert(!interruptPending);
+
+    for (ThreadID i = 0; i < numThreads; ++i) {
+        assert(!memReq[i]);
+        assert(fetchStatus[i] == Idle || stalls[i].drain);
+    }
+
+    branchPred->drainSanityCheck();
+}
+
+bool
+DefaultFetch::isDrained() const
+{
+    /* Make sure that threads are either idle of that the commit stage
+     * has signaled that draining has completed by setting the drain
+     * stall flag. This effectively forces the pipeline to be disabled
+     * until the whole system is drained (simulation may continue to
+     * drain other components).
+     */
+    for (ThreadID i = 0; i < numThreads; ++i) {
+        // Verify fetch queues are drained
+        if (!fetchQueue[i].empty())
+            return false;
+
+        // Return false if not idle or drain stalled
+        if (fetchStatus[i] != Idle) {
+            if (fetchStatus[i] == Blocked && stalls[i].drain)
+                continue;
+            else
+                return false;
+        }
+    }
+
+    /* The pipeline might start up again in the middle of the drain
+     * cycle if the finish translation event is scheduled, so make
+     * sure that's not the case.
+     */
+    return !finishTranslationEvent.scheduled();
+}
+
+void
+DefaultFetch::takeOverFrom()
+{
+    assert(cpu->getInstPort().isConnected());
+    resetStage();
+
+}
+
+void
+DefaultFetch::drainStall(ThreadID tid)
+{
+    assert(cpu->isDraining());
+    assert(!stalls[tid].drain);
+    DPRINTF(Drain, "%i: Thread drained.\n", tid);
+    stalls[tid].drain = true;
+}
+
+void
+DefaultFetch::wakeFromQuiesce()
+{
+    DPRINTF(Fetch, "Waking up from quiesce\n");
+    // Hopefully this is safe
+    // @todo: Allow other threads to wake from quiesce.
+    fetchStatus[0] = Running;
+}
+
+void
+DefaultFetch::switchToActive()
+{
+    if (_status == Inactive) {
+        DPRINTF(Activity, "Activating stage.\n");
+
+        cpu->activateStage(FullO3CPU<O3CPUImpl>::FetchIdx);
+
+        _status = Active;
+    }
+}
+
+void
+DefaultFetch::switchToInactive()
+{
+    if (_status == Active) {
+        DPRINTF(Activity, "Deactivating stage.\n");
+
+        cpu->deactivateStage(FullO3CPU<O3CPUImpl>::FetchIdx);
+
+        _status = Inactive;
+    }
+}
+
+void
+DefaultFetch::deactivateThread(ThreadID tid)
+{
+    // Update priority list
+    auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
+    if (thread_it != priorityList.end()) {
+        priorityList.erase(thread_it);
+    }
+}
+
+bool
+DefaultFetch::lookupAndUpdateNextPC(const O3DynInstPtr &inst,
+        TheISA::PCState &nextPC)
+{
+    // Do branch prediction check here.
+    // A bit of a misnomer...next_PC is actually the current PC until
+    // this function updates it.
+    bool predict_taken;
+
+    if (!inst->isControl()) {
+        inst->staticInst->advancePC(nextPC);
+        inst->setPredTarg(nextPC);
+        inst->setPredTaken(false);
+        return false;
+    }
+
+    ThreadID tid = inst->threadNumber;
+    predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
+                                        nextPC, tid);
+
+    if (predict_taken) {
+        DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
+                "predicted to be taken to %s\n",
+                tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
+    } else {
+        DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
+                "predicted to be not taken\n",
+                tid, inst->seqNum, inst->pcState().instAddr());
+    }
+
+    DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
+            "predicted to go to %s\n",
+            tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
+    inst->setPredTarg(nextPC);
+    inst->setPredTaken(predict_taken);
+
+    ++fetchStats.branches;
+
+    if (predict_taken) {
+        ++fetchStats.predictedBranches;
+    }
+
+    return predict_taken;
+}
+
+bool
+DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
+{
+    Fault fault = NoFault;
+
+    assert(!cpu->switchedOut());
+
+    // @todo: not sure if these should block translation.
+    //AlphaDep
+    if (cacheBlocked) {
+        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
+                tid);
+        return false;
+    } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
+        // Hold off fetch from getting new instructions when:
+        // Cache is blocked, or
+        // while an interrupt is pending and we're not in PAL mode, or
+        // fetch is switched out.
+        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
+                tid);
+        return false;
+    }
+
+    // Align the fetch address to the start of a fetch buffer segment.
+    Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
+
+    DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
+            tid, fetchBufferBlockPC, vaddr);
+
+    // Setup the memReq to do a read of the first instruction's address.
+    // Set the appropriate read size and flags as well.
+    // Build request here.
+    RequestPtr mem_req = std::make_shared<Request>(
+        fetchBufferBlockPC, fetchBufferSize,
+        Request::INST_FETCH, cpu->instRequestorId(), pc,
+        cpu->thread[tid]->contextId());
+
+    mem_req->taskId(cpu->taskId());
+
+    memReq[tid] = mem_req;
+
+    // Initiate translation of the icache block
+    fetchStatus[tid] = ItlbWait;
+    FetchTranslation *trans = new FetchTranslation(this);
+    cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
+                              trans, BaseTLB::Execute);
+    return true;
+}
+
+void
+DefaultFetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req)
+{
+    ThreadID tid = cpu->contextToThread(mem_req->contextId());
+    Addr fetchBufferBlockPC = mem_req->getVaddr();
+
+    assert(!cpu->switchedOut());
+
+    // Wake up CPU if it was idle
+    cpu->wakeCPU();
+
+    if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
+        mem_req->getVaddr() != memReq[tid]->getVaddr()) {
+        DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
+                tid);
+        ++fetchStats.tlbSquashes;
+        return;
+    }
+
+
+    // If translation was successful, attempt to read the icache block.
+    if (fault == NoFault) {
+        // Check that we're not going off into random memory
+        // If we have, just wait around for commit to squash something and put
+        // us on the right track
+        if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
+            warn("Address %#x is outside of physical memory, stopping fetch\n",
+                    mem_req->getPaddr());
+            fetchStatus[tid] = NoGoodAddr;
+            memReq[tid] = NULL;
+            return;
+        }
+
+        // Build packet here.
+        PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
+        data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
+
+        fetchBufferPC[tid] = fetchBufferBlockPC;
+        fetchBufferValid[tid] = false;
+        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
+
+        fetchStats.cacheLines++;
+
+        // Access the cache.
+        if (!icachePort.sendTimingReq(data_pkt)) {
+            assert(retryPkt == NULL);
+            assert(retryTid == InvalidThreadID);
+            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
+
+            fetchStatus[tid] = IcacheWaitRetry;
+            retryPkt = data_pkt;
+            retryTid = tid;
+            cacheBlocked = true;
+        } else {
+            DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
+            DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
+                    "response.\n", tid);
+            lastIcacheStall[tid] = curTick();
+            fetchStatus[tid] = IcacheWaitResponse;
+            // Notify Fetch Request probe when a packet containing a fetch
+            // request is successfully sent
+            ppFetchRequestSent->notify(mem_req);
+        }
+    } else {
+        // Don't send an instruction to decode if we can't handle it.
+        if (!(numInst < fetchWidth) ||
+                !(fetchQueue[tid].size() < fetchQueueSize)) {
+            assert(!finishTranslationEvent.scheduled());
+            finishTranslationEvent.setFault(fault);
+            finishTranslationEvent.setReq(mem_req);
+            cpu->schedule(finishTranslationEvent,
+                          cpu->clockEdge(Cycles(1)));
+            return;
+        }
+        DPRINTF(Fetch,
+                "[tid:%i] Got back req with addr %#x but expected %#x\n",
+                tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
+        // Translation faulted, icache request won't be sent.
+        memReq[tid] = NULL;
+
+        // Send the fault to commit.  This thread will not do anything
+        // until commit handles the fault.  The only other way it can
+        // wake up is if a squash comes along and changes the PC.
+        TheISA::PCState fetchPC = pc[tid];
+
+        DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
+        // We will use a nop in ordier to carry the fault.
+        O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
+                fetchPC, fetchPC, false);
+        instruction->setNotAnInst();
+
+        instruction->setPredTarg(fetchPC);
+        instruction->fault = fault;
+        wroteToTimeBuffer = true;
+
+        DPRINTF(Activity, "Activity this cycle.\n");
+        cpu->activityThisCycle();
+
+        fetchStatus[tid] = TrapPending;
+
+        DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
+        DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
+                tid, fault->name(), pc[tid]);
+    }
+    _status = updateFetchStatus();
+}
+
+void
+DefaultFetch::doSquash(const TheISA::PCState &newPC,
+        const O3DynInstPtr squashInst, ThreadID tid)
+{
+    DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
+            tid, newPC);
+
+    pc[tid] = newPC;
+    fetchOffset[tid] = 0;
+    if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
+        macroop[tid] = squashInst->macroop;
+    else
+        macroop[tid] = NULL;
+    decoder[tid]->reset();
+
+    // Clear the icache miss if it's outstanding.
+    if (fetchStatus[tid] == IcacheWaitResponse) {
+        DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
+                tid);
+        memReq[tid] = NULL;
+    } else if (fetchStatus[tid] == ItlbWait) {
+        DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
+                tid);
+        memReq[tid] = NULL;
+    }
+
+    // Get rid of the retrying packet if it was from this thread.
+    if (retryTid == tid) {
+        assert(cacheBlocked);
+        if (retryPkt) {
+            delete retryPkt;
+        }
+        retryPkt = NULL;
+        retryTid = InvalidThreadID;
+    }
+
+    fetchStatus[tid] = Squashing;
+
+    // Empty fetch queue
+    fetchQueue[tid].clear();
+
+    // microops are being squashed, it is not known wheather the
+    // youngest non-squashed microop was  marked delayed commit
+    // or not. Setting the flag to true ensures that the
+    // interrupts are not handled when they cannot be, though
+    // some opportunities to handle interrupts may be missed.
+    delayedCommit[tid] = true;
+
+    ++fetchStats.squashCycles;
+}
+
+void
+DefaultFetch::squashFromDecode(const TheISA::PCState &newPC,
+        const O3DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid)
+{
+    DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
+
+    doSquash(newPC, squashInst, tid);
+
+    // Tell the CPU to remove any instructions that are in flight between
+    // fetch and decode.
+    cpu->removeInstsUntil(seq_num, tid);
+}
+
+bool
+DefaultFetch::checkStall(ThreadID tid) const
+{
+    bool ret_val = false;
+
+    if (stalls[tid].drain) {
+        assert(cpu->isDraining());
+        DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
+        ret_val = true;
+    }
+
+    return ret_val;
+}
+
+DefaultFetch::FetchStatus
+DefaultFetch::updateFetchStatus()
+{
+    //Check Running
+    std::list<ThreadID>::iterator threads = activeThreads->begin();
+    std::list<ThreadID>::iterator end = activeThreads->end();
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+
+        if (fetchStatus[tid] == Running ||
+            fetchStatus[tid] == Squashing ||
+            fetchStatus[tid] == IcacheAccessComplete) {
+
+            if (_status == Inactive) {
+                DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
+
+                if (fetchStatus[tid] == IcacheAccessComplete) {
+                    DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
+                            "completion\n",tid);
+                }
+
+                cpu->activateStage(FullO3CPU<O3CPUImpl>::FetchIdx);
+            }
+
+            return Active;
+        }
+    }
+
+    // Stage is switching from active to inactive, notify CPU of it.
+    if (_status == Active) {
+        DPRINTF(Activity, "Deactivating stage.\n");
+
+        cpu->deactivateStage(FullO3CPU<O3CPUImpl>::FetchIdx);
+    }
+
+    return Inactive;
+}
+
+void
+DefaultFetch::squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
+        O3DynInstPtr squashInst, ThreadID tid)
+{
+    DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
+
+    doSquash(newPC, squashInst, tid);
+
+    // Tell the CPU to remove any instructions that are not in the ROB.
+    cpu->removeInstsNotInROB(tid);
+}
+
+void
+DefaultFetch::tick()
+{
+    std::list<ThreadID>::iterator threads = activeThreads->begin();
+    std::list<ThreadID>::iterator end = activeThreads->end();
+    bool status_change = false;
+
+    wroteToTimeBuffer = false;
+
+    for (ThreadID i = 0; i < numThreads; ++i) {
+        issuePipelinedIfetch[i] = false;
+    }
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+
+        // Check the signals for each thread to determine the proper status
+        // for each thread.
+        bool updated_status = checkSignalsAndUpdate(tid);
+        status_change =  status_change || updated_status;
+    }
+
+    DPRINTF(Fetch, "Running stage.\n");
+
+    if (FullSystem) {
+        if (fromCommit->commitInfo[0].interruptPending) {
+            interruptPending = true;
+        }
+
+        if (fromCommit->commitInfo[0].clearInterrupt) {
+            interruptPending = false;
+        }
+    }
+
+    for (threadFetched = 0; threadFetched < numFetchingThreads;
+         threadFetched++) {
+        // Fetch each of the actively fetching threads.
+        fetch(status_change);
+    }
+
+    // Record number of instructions fetched this cycle for distribution.
+    fetchStats.nisnDist.sample(numInst);
+
+    if (status_change) {
+        // Change the fetch stage status if there was a status change.
+        _status = updateFetchStatus();
+    }
+
+    // Issue the next I-cache request if possible.
+    for (ThreadID i = 0; i < numThreads; ++i) {
+        if (issuePipelinedIfetch[i]) {
+            pipelineIcacheAccesses(i);
+        }
+    }
+
+    // Send instructions enqueued into the fetch queue to decode.
+    // Limit rate by fetchWidth.  Stall if decode is stalled.
+    unsigned insts_to_decode = 0;
+    unsigned available_insts = 0;
+
+    for (auto tid : *activeThreads) {
+        if (!stalls[tid].decode) {
+            available_insts += fetchQueue[tid].size();
+        }
+    }
+
+    // Pick a random thread to start trying to grab instructions from
+    auto tid_itr = activeThreads->begin();
+    std::advance(tid_itr,
+            random_mt.random<uint8_t>(0, activeThreads->size() - 1));
+
+    while (available_insts != 0 && insts_to_decode < decodeWidth) {
+        ThreadID tid = *tid_itr;
+        if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
+            const auto& inst = fetchQueue[tid].front();
+            toDecode->insts[toDecode->size++] = inst;
+            DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
+                    "from fetch queue. Fetch queue size: %i.\n",
+                    tid, inst->seqNum, fetchQueue[tid].size());
+
+            wroteToTimeBuffer = true;
+            fetchQueue[tid].pop_front();
+            insts_to_decode++;
+            available_insts--;
+        }
+
+        tid_itr++;
+        // Wrap around if at end of active threads list
+        if (tid_itr == activeThreads->end())
+            tid_itr = activeThreads->begin();
+    }
+
+    // If there was activity this cycle, inform the CPU of it.
+    if (wroteToTimeBuffer) {
+        DPRINTF(Activity, "Activity this cycle.\n");
+        cpu->activityThisCycle();
+    }
+
+    // Reset the number of the instruction we've fetched.
+    numInst = 0;
+}
+
+bool
+DefaultFetch::checkSignalsAndUpdate(ThreadID tid)
+{
+    // Update the per thread stall statuses.
+    if (fromDecode->decodeBlock[tid]) {
+        stalls[tid].decode = true;
+    }
+
+    if (fromDecode->decodeUnblock[tid]) {
+        assert(stalls[tid].decode);
+        assert(!fromDecode->decodeBlock[tid]);
+        stalls[tid].decode = false;
+    }
+
+    // Check squash signals from commit.
+    if (fromCommit->commitInfo[tid].squash) {
+
+        DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
+                "from commit.\n",tid);
+        // In any case, squash.
+        squash(fromCommit->commitInfo[tid].pc,
+               fromCommit->commitInfo[tid].doneSeqNum,
+               fromCommit->commitInfo[tid].squashInst, tid);
+
+        // If it was a branch mispredict on a control instruction, update the
+        // branch predictor with that instruction, otherwise just kill the
+        // invalid state we generated in after sequence number
+        if (fromCommit->commitInfo[tid].mispredictInst &&
+            fromCommit->commitInfo[tid].mispredictInst->isControl()) {
+            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
+                              fromCommit->commitInfo[tid].pc,
+                              fromCommit->commitInfo[tid].branchTaken,
+                              tid);
+        } else {
+            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
+                              tid);
+        }
+
+        return true;
+    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
+        // Update the branch predictor if it wasn't a squashed instruction
+        // that was broadcasted.
+        branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
+    }
+
+    // Check squash signals from decode.
+    if (fromDecode->decodeInfo[tid].squash) {
+        DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
+                "from decode.\n",tid);
+
+        // Update the branch predictor.
+        if (fromDecode->decodeInfo[tid].branchMispredict) {
+            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
+                              fromDecode->decodeInfo[tid].nextPC,
+                              fromDecode->decodeInfo[tid].branchTaken,
+                              tid);
+        } else {
+            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
+                              tid);
+        }
+
+        if (fetchStatus[tid] != Squashing) {
+
+            DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
+                fromDecode->decodeInfo[tid].nextPC);
+            // Squash unless we're already squashing
+            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
+                             fromDecode->decodeInfo[tid].squashInst,
+                             fromDecode->decodeInfo[tid].doneSeqNum,
+                             tid);
+
+            return true;
+        }
+    }
+
+    if (checkStall(tid) &&
+        fetchStatus[tid] != IcacheWaitResponse &&
+        fetchStatus[tid] != IcacheWaitRetry &&
+        fetchStatus[tid] != ItlbWait &&
+        fetchStatus[tid] != QuiescePending) {
+        DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
+
+        fetchStatus[tid] = Blocked;
+
+        return true;
+    }
+
+    if (fetchStatus[tid] == Blocked ||
+        fetchStatus[tid] == Squashing) {
+        // Switch status to running if fetch isn't being told to block or
+        // squash this cycle.
+        DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
+                tid);
+
+        fetchStatus[tid] = Running;
+
+        return true;
+    }
+
+    // If we've reached this point, we have not gotten any signals that
+    // cause fetch to change its status.  Fetch remains the same as before.
+    return false;
+}
+
+O3DynInstPtr
+DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
+        StaticInstPtr curMacroop, TheISA::PCState thisPC,
+        TheISA::PCState nextPC, bool trace)
+{
+    // Get a sequence number.
+    InstSeqNum seq = cpu->getAndIncrementInstSeq();
+
+    // Create a new DynInst from the instruction fetched.
+    O3DynInstPtr instruction =
+        new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
+    instruction->setTid(tid);
+
+    instruction->setThreadState(cpu->thread[tid]);
+
+    DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created "
+            "[sn:%lli].\n", tid, thisPC.instAddr(),
+            thisPC.microPC(), seq);
+
+    DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
+            instruction->staticInst->
+            disassemble(thisPC.instAddr()));
+
+#if TRACING_ON
+    if (trace) {
+        instruction->traceData =
+            cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
+                    instruction->staticInst, thisPC, curMacroop);
+    }
+#else
+    instruction->traceData = NULL;
+#endif
+
+    // Add instruction to the CPU's list of instructions.
+    instruction->setInstListIt(cpu->addInst(instruction));
+
+    // Write the instruction to the first slot in the queue
+    // that heads to decode.
+    assert(numInst < fetchWidth);
+    fetchQueue[tid].push_back(instruction);
+    assert(fetchQueue[tid].size() <= fetchQueueSize);
+    DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
+            tid, fetchQueue[tid].size(), fetchQueueSize);
+    //toDecode->insts[toDecode->size++] = instruction;
+
+    // Keep track of if we can take an interrupt at this boundary
+    delayedCommit[tid] = instruction->isDelayedCommit();
+
+    return instruction;
+}
+
+void
+DefaultFetch::fetch(bool &status_change)
+{
+    //////////////////////////////////////////
+    // Start actual fetch
+    //////////////////////////////////////////
+    ThreadID tid = getFetchingThread();
+
+    assert(!cpu->switchedOut());
+
+    if (tid == InvalidThreadID) {
+        // Breaks looping condition in tick()
+        threadFetched = numFetchingThreads;
+
+        if (numThreads == 1) {  // @todo Per-thread stats
+            profileStall(0);
+        }
+
+        return;
+    }
+
+    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
+
+    // The current PC.
+    TheISA::PCState thisPC = pc[tid];
+
+    Addr pcOffset = fetchOffset[tid];
+    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+
+    bool inRom = isRomMicroPC(thisPC.microPC());
+
+    // If returning from the delay of a cache miss, then update the status
+    // to running, otherwise do the cache access.  Possibly move this up
+    // to tick() function.
+    if (fetchStatus[tid] == IcacheAccessComplete) {
+        DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
+
+        fetchStatus[tid] = Running;
+        status_change = true;
+    } else if (fetchStatus[tid] == Running) {
+        // Align the fetch PC so its at the start of a fetch buffer segment.
+        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
+
+        // If buffer is no longer valid or fetchAddr has moved to point
+        // to the next cache block, AND we have no remaining ucode
+        // from a macro-op, then start fetch from icache.
+        if (!(fetchBufferValid[tid] &&
+                    fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom &&
+                !macroop[tid]) {
+            DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
+                    "instruction, starting at PC %s.\n", tid, thisPC);
+
+            fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
+
+            if (fetchStatus[tid] == IcacheWaitResponse)
+                ++fetchStats.icacheStallCycles;
+            else if (fetchStatus[tid] == ItlbWait)
+                ++fetchStats.tlbCycles;
+            else
+                ++fetchStats.miscStallCycles;
+            return;
+        } else if (checkInterrupt(thisPC.instAddr() && !delayedCommit[tid])) {
+            // Stall CPU if an interrupt is posted and we're not issuing
+            // an delayed commit micro-op currently (delayed commit
+            // instructions are not interruptable by interrupts, only faults)
+            ++fetchStats.miscStallCycles;
+            DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
+            return;
+        }
+    } else {
+        if (fetchStatus[tid] == Idle) {
+            ++fetchStats.idleCycles;
+            DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
+        }
+
+        // Status is Idle, so fetch should do nothing.
+        return;
+    }
+
+    ++fetchStats.cycles;
+
+    TheISA::PCState nextPC = thisPC;
+
+    StaticInstPtr staticInst = NULL;
+    StaticInstPtr curMacroop = macroop[tid];
+
+    // If the read of the first instruction was successful, then grab the
+    // instructions from the rest of the cache line and put them into the
+    // queue heading to decode.
+
+    DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
+            "decode.\n", tid);
+
+    // Need to keep track of whether or not a predicted branch
+    // ended this fetch block.
+    bool predictedBranch = false;
+
+    // Need to halt fetch if quiesce instruction detected
+    bool quiesce = false;
+
+    TheISA::MachInst *cacheInsts =
+        reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
+
+    const unsigned numInsts = fetchBufferSize / instSize;
+    unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
+
+    // Loop through instruction memory from the cache.
+    // Keep issuing while fetchWidth is available and branch is not
+    // predicted taken
+    while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
+           && !predictedBranch && !quiesce) {
+        // We need to process more memory if we aren't going to get a
+        // StaticInst from the rom, the current macroop, or what's already
+        // in the decoder.
+        bool needMem = !inRom && !curMacroop &&
+            !decoder[tid]->instReady();
+        fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
+
+        if (needMem) {
+            // If buffer is no longer valid or fetchAddr has moved to point
+            // to the next cache block then start fetch from icache.
+            if (!fetchBufferValid[tid] ||
+                fetchBufferBlockPC != fetchBufferPC[tid])
+                break;
+
+            if (blkOffset >= numInsts) {
+                // We need to process more memory, but we've run out of the
+                // current block.
+                break;
+            }
+
+            decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]);
+
+            if (decoder[tid]->needMoreBytes()) {
+                blkOffset++;
+                fetchAddr += instSize;
+                pcOffset += instSize;
+            }
+        }
+
+        // Extract as many instructions and/or microops as we can from
+        // the memory we've processed so far.
+        do {
+            if (!(curMacroop || inRom)) {
+                if (decoder[tid]->instReady()) {
+                    staticInst = decoder[tid]->decode(thisPC);
+
+                    // Increment stat of fetched instructions.
+                    ++fetchStats.insts;
+
+                    if (staticInst->isMacroop()) {
+                        curMacroop = staticInst;
+                    } else {
+                        pcOffset = 0;
+                    }
+                } else {
+                    // We need more bytes for this instruction so blkOffset and
+                    // pcOffset will be updated
+                    break;
+                }
+            }
+            // Whether we're moving to a new macroop because we're at the
+            // end of the current one, or the branch predictor incorrectly
+            // thinks we are...
+            bool newMacro = false;
+            if (curMacroop || inRom) {
+                if (inRom) {
+                    staticInst = decoder[tid]->fetchRomMicroop(
+                            thisPC.microPC(), curMacroop);
+                } else {
+                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
+                }
+                newMacro |= staticInst->isLastMicroop();
+            }
+
+            O3DynInstPtr instruction =
+                buildInst(tid, staticInst, curMacroop,
+                          thisPC, nextPC, true);
+
+            ppFetch->notify(instruction);
+            numInst++;
+
+#if TRACING_ON
+            if (Debug::O3PipeView) {
+                instruction->fetchTick = curTick();
+            }
+#endif
+
+            nextPC = thisPC;
+
+            // If we're branching after this instruction, quit fetching
+            // from the same block.
+            predictedBranch |= thisPC.branching();
+            predictedBranch |=
+                lookupAndUpdateNextPC(instruction, nextPC);
+            if (predictedBranch) {
+                DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
+            }
+
+            newMacro |= thisPC.instAddr() != nextPC.instAddr();
+
+            // Move to the next instruction, unless we have a branch.
+            thisPC = nextPC;
+            inRom = isRomMicroPC(thisPC.microPC());
+
+            if (newMacro) {
+                fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
+                blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
+                pcOffset = 0;
+                curMacroop = NULL;
+            }
+
+            if (instruction->isQuiesce()) {
+                DPRINTF(Fetch,
+                        "Quiesce instruction encountered, halting fetch!\n");
+                fetchStatus[tid] = QuiescePending;
+                status_change = true;
+                quiesce = true;
+                break;
+            }
+        } while ((curMacroop || decoder[tid]->instReady()) &&
+                 numInst < fetchWidth &&
+                 fetchQueue[tid].size() < fetchQueueSize);
+
+        // Re-evaluate whether the next instruction to fetch is in micro-op ROM
+        // or not.
+        inRom = isRomMicroPC(thisPC.microPC());
+    }
+
+    if (predictedBranch) {
+        DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
+                "instruction encountered.\n", tid);
+    } else if (numInst >= fetchWidth) {
+        DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
+                "for this cycle.\n", tid);
+    } else if (blkOffset >= fetchBufferSize) {
+        DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
+                "fetch buffer.\n", tid);
+    }
+
+    macroop[tid] = curMacroop;
+    fetchOffset[tid] = pcOffset;
+
+    if (numInst > 0) {
+        wroteToTimeBuffer = true;
+    }
+
+    pc[tid] = thisPC;
+
+    // pipeline a fetch if we're crossing a fetch buffer boundary and not in
+    // a state that would preclude fetching
+    fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
+    issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
+        fetchStatus[tid] != IcacheWaitResponse &&
+        fetchStatus[tid] != ItlbWait &&
+        fetchStatus[tid] != IcacheWaitRetry &&
+        fetchStatus[tid] != QuiescePending &&
+        !curMacroop;
+}
+
+void
+DefaultFetch::recvReqRetry()
+{
+    if (retryPkt != NULL) {
+        assert(cacheBlocked);
+        assert(retryTid != InvalidThreadID);
+        assert(fetchStatus[retryTid] == IcacheWaitRetry);
+
+        if (icachePort.sendTimingReq(retryPkt)) {
+            fetchStatus[retryTid] = IcacheWaitResponse;
+            // Notify Fetch Request probe when a retryPkt is successfully sent.
+            // Note that notify must be called before retryPkt is set to NULL.
+            ppFetchRequestSent->notify(retryPkt->req);
+            retryPkt = NULL;
+            retryTid = InvalidThreadID;
+            cacheBlocked = false;
+        }
+    } else {
+        assert(retryTid == InvalidThreadID);
+        // Access has been squashed since it was sent out.  Just clear
+        // the cache being blocked.
+        cacheBlocked = false;
+    }
+}
+
+///////////////////////////////////////
+//                                   //
+//  SMT FETCH POLICY MAINTAINED HERE //
+//                                   //
+///////////////////////////////////////
+ThreadID
+DefaultFetch::getFetchingThread()
+{
+    if (numThreads > 1) {
+        switch (fetchPolicy) {
+          case SMTFetchPolicy::RoundRobin:
+            return roundRobin();
+          case SMTFetchPolicy::IQCount:
+            return iqCount();
+          case SMTFetchPolicy::LSQCount:
+            return lsqCount();
+          case SMTFetchPolicy::Branch:
+            return branchCount();
+          default:
+            return InvalidThreadID;
+        }
+    } else {
+        std::list<ThreadID>::iterator thread = activeThreads->begin();
+        if (thread == activeThreads->end()) {
+            return InvalidThreadID;
+        }
+
+        ThreadID tid = *thread;
+
+        if (fetchStatus[tid] == Running ||
+            fetchStatus[tid] == IcacheAccessComplete ||
+            fetchStatus[tid] == Idle) {
+            return tid;
+        } else {
+            return InvalidThreadID;
+        }
+    }
+}
+
+
+ThreadID
+DefaultFetch::roundRobin()
+{
+    std::list<ThreadID>::iterator pri_iter = priorityList.begin();
+    std::list<ThreadID>::iterator end      = priorityList.end();
+
+    ThreadID high_pri;
+
+    while (pri_iter != end) {
+        high_pri = *pri_iter;
+
+        assert(high_pri <= numThreads);
+
+        if (fetchStatus[high_pri] == Running ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
+            fetchStatus[high_pri] == Idle) {
+
+            priorityList.erase(pri_iter);
+            priorityList.push_back(high_pri);
+
+            return high_pri;
+        }
+
+        pri_iter++;
+    }
+
+    return InvalidThreadID;
+}
+
+ThreadID
+DefaultFetch::iqCount()
+{
+    //sorted from lowest->highest
+    std::priority_queue<unsigned, std::vector<unsigned>,
+                        std::greater<unsigned> > PQ;
+    std::map<unsigned, ThreadID> threadMap;
+
+    std::list<ThreadID>::iterator threads = activeThreads->begin();
+    std::list<ThreadID>::iterator end = activeThreads->end();
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+        unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
+
+        //we can potentially get tid collisions if two threads
+        //have the same iqCount, but this should be rare.
+        PQ.push(iqCount);
+        threadMap[iqCount] = tid;
+    }
+
+    while (!PQ.empty()) {
+        ThreadID high_pri = threadMap[PQ.top()];
+
+        if (fetchStatus[high_pri] == Running ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
+            fetchStatus[high_pri] == Idle)
+            return high_pri;
+        else
+            PQ.pop();
+
+    }
+
+    return InvalidThreadID;
+}
+
+ThreadID
+DefaultFetch::lsqCount()
+{
+    //sorted from lowest->highest
+    std::priority_queue<unsigned, std::vector<unsigned>,
+                        std::greater<unsigned> > PQ;
+    std::map<unsigned, ThreadID> threadMap;
+
+    std::list<ThreadID>::iterator threads = activeThreads->begin();
+    std::list<ThreadID>::iterator end = activeThreads->end();
+
+    while (threads != end) {
+        ThreadID tid = *threads++;
+        unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
+
+        //we can potentially get tid collisions if two threads
+        //have the same iqCount, but this should be rare.
+        PQ.push(ldstqCount);
+        threadMap[ldstqCount] = tid;
+    }
+
+    while (!PQ.empty()) {
+        ThreadID high_pri = threadMap[PQ.top()];
+
+        if (fetchStatus[high_pri] == Running ||
+            fetchStatus[high_pri] == IcacheAccessComplete ||
+            fetchStatus[high_pri] == Idle)
+            return high_pri;
+        else
+            PQ.pop();
+    }
+
+    return InvalidThreadID;
+}
+
+ThreadID
+DefaultFetch::branchCount()
+{
+    panic("Branch Count Fetch policy unimplemented\n");
+    return InvalidThreadID;
+}
+
+void
+DefaultFetch::pipelineIcacheAccesses(ThreadID tid)
+{
+    if (!issuePipelinedIfetch[tid]) {
+        return;
+    }
+
+    // The next PC to access.
+    TheISA::PCState thisPC = pc[tid];
+
+    if (isRomMicroPC(thisPC.microPC())) {
+        return;
+    }
+
+    Addr pcOffset = fetchOffset[tid];
+    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
+
+    // Align the fetch PC so its at the start of a fetch buffer segment.
+    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
+
+    // Unless buffer already got the block, fetch it from icache.
+    if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
+        DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
+                "starting at PC %s.\n", tid, thisPC);
+
+        fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
+    }
+}
+
+void
+DefaultFetch::profileStall(ThreadID tid)
+{
+    DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
+
+    // @todo Per-thread stats
+
+    if (stalls[tid].drain) {
+        ++fetchStats.pendingDrainCycles;
+        DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
+    } else if (activeThreads->empty()) {
+        ++fetchStats.noActiveThreadStallCycles;
+        DPRINTF(Fetch, "Fetch has no active thread!\n");
+    } else if (fetchStatus[tid] == Blocked) {
+        ++fetchStats.blockedCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
+    } else if (fetchStatus[tid] == Squashing) {
+        ++fetchStats.squashCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
+    } else if (fetchStatus[tid] == IcacheWaitResponse) {
+        ++fetchStats.icacheStallCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
+                tid);
+    } else if (fetchStatus[tid] == ItlbWait) {
+        ++fetchStats.tlbCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
+                "finish!\n", tid);
+    } else if (fetchStatus[tid] == TrapPending) {
+        ++fetchStats.pendingTrapStallCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
+                tid);
+    } else if (fetchStatus[tid] == QuiescePending) {
+        ++fetchStats.pendingQuiesceStallCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
+                "instruction!\n", tid);
+    } else if (fetchStatus[tid] == IcacheWaitRetry) {
+        ++fetchStats.icacheWaitRetryStallCycles;
+        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
+                tid);
+    } else if (fetchStatus[tid] == NoGoodAddr) {
+            DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
+                    tid);
+    } else {
+        DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
+            "(Status: %i)\n",
+            tid, fetchStatus[tid]);
+    }
+}
+
+bool
+DefaultFetch::IcachePort::recvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(O3CPU, "Fetch unit received timing\n");
+    // We shouldn't ever get a cacheable block in Modified state
+    assert(pkt->req->isUncacheable() ||
+           !(pkt->cacheResponding() && !pkt->hasSharers()));
+    fetch->processCacheCompletion(pkt);
+
+    return true;
+}
+
+void
+DefaultFetch::IcachePort::recvReqRetry()
+{
+    fetch->recvReqRetry();
+}
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index ff6ffee119..c8fe6d398e 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -46,6 +46,7 @@
 #include "config/the_isa.hh"
 #include "cpu/o3/comm.hh"
 #include "cpu/o3/dyn_inst_ptr.hh"
+#include "cpu/o3/impl.hh"
 #include "cpu/o3/limits.hh"
 #include "cpu/pc_event.hh"
 #include "cpu/pred/bpred_unit.hh"
@@ -69,7 +70,6 @@ class FullO3CPU;
  * It supports the idling functionality of the CPU by indicating to
  * the CPU when it is active and inactive.
  */
-template <class Impl>
 class DefaultFetch
 {
   public:
@@ -80,13 +80,11 @@ class DefaultFetch
     {
       protected:
         /** Pointer to fetch. */
-        DefaultFetch<Impl> *fetch;
+        DefaultFetch *fetch;
 
       public:
         /** Default constructor. */
-        IcachePort(DefaultFetch<Impl> *_fetch, FullO3CPU<Impl>* _cpu)
-            : RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch)
-        { }
+        IcachePort(DefaultFetch *_fetch, FullO3CPU<O3CPUImpl>* _cpu);
 
       protected:
 
@@ -101,16 +99,12 @@ class DefaultFetch
     class FetchTranslation : public BaseTLB::Translation
     {
       protected:
-        DefaultFetch<Impl> *fetch;
+        DefaultFetch *fetch;
 
       public:
-        FetchTranslation(DefaultFetch<Impl> *_fetch)
-            : fetch(_fetch)
-        {}
+        FetchTranslation(DefaultFetch *_fetch) : fetch(_fetch) {}
 
-        void
-        markDelayed()
-        {}
+        void markDelayed() {}
 
         void
         finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc,
@@ -129,33 +123,28 @@ class DefaultFetch
     class FinishTranslationEvent : public Event
     {
       private:
-        DefaultFetch<Impl> *fetch;
+        DefaultFetch *fetch;
         Fault fault;
         RequestPtr req;
 
       public:
-        FinishTranslationEvent(DefaultFetch<Impl> *_fetch)
+        FinishTranslationEvent(DefaultFetch *_fetch)
             : fetch(_fetch), req(nullptr)
         {}
 
-        void setFault(Fault _fault)
-        {
-            fault = _fault;
-        }
-
-        void setReq(const RequestPtr &_req)
-        {
-            req = _req;
-        }
+        void setFault(Fault _fault) { fault = _fault; }
+        void setReq(const RequestPtr &_req) { req = _req; }
 
         /** Process the delayed finish translation */
-        void process()
+        void
+        process()
         {
             assert(fetch->numInst < fetch->fetchWidth);
             fetch->finishTranslation(fault, req);
         }
 
-        const char *description() const
+        const char *
+        description() const
         {
             return "FullO3CPU FetchFinishTranslation";
         }
@@ -208,7 +197,7 @@ class DefaultFetch
 
   public:
     /** DefaultFetch constructor. */
-    DefaultFetch(FullO3CPU<Impl> *_cpu, const DerivO3CPUParams &params);
+    DefaultFetch(FullO3CPU<O3CPUImpl> *_cpu, const DerivO3CPUParams &params);
 
     /** Returns the name of fetch. */
     std::string name() const;
@@ -273,12 +262,12 @@ class DefaultFetch
     /** Changes the status of this stage to active, and indicates this
      * to the CPU.
      */
-    inline void switchToActive();
+    void switchToActive();
 
     /** Changes the status of this stage to inactive, and indicates
      * this to the CPU.
      */
-    inline void switchToInactive();
+    void switchToInactive();
 
     /**
      * Looks up in the branch predictor to see if the next PC should be
@@ -308,15 +297,11 @@ class DefaultFetch
 
     /** Check if an interrupt is pending and that we need to handle
      */
-    bool
-    checkInterrupt(Addr pc)
-    {
-        return interruptPending;
-    }
+    bool checkInterrupt(Addr pc) { return interruptPending; }
 
     /** Squashes a specific thread and resets the PC. */
-    inline void doSquash(const TheISA::PCState &newPC,
-                         const O3DynInstPtr squashInst, ThreadID tid);
+    void doSquash(const TheISA::PCState &newPC,
+            const O3DynInstPtr squashInst, ThreadID tid);
 
     /** Squashes a specific thread and resets the PC. Also tells the CPU to
      * remove any instructions between fetch and decode
@@ -398,26 +383,26 @@ class DefaultFetch
 
   private:
     /** Pointer to the O3CPU. */
-    FullO3CPU<Impl> *cpu;
+    FullO3CPU<O3CPUImpl> *cpu;
 
     /** Time buffer interface. */
     TimeBuffer<O3Comm::TimeStruct> *timeBuffer;
 
     /** Wire to get decode's information from backwards time buffer. */
-    typename TimeBuffer<O3Comm::TimeStruct>::wire fromDecode;
+    TimeBuffer<O3Comm::TimeStruct>::wire fromDecode;
 
     /** Wire to get rename's information from backwards time buffer. */
-    typename TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
+    TimeBuffer<O3Comm::TimeStruct>::wire fromRename;
 
     /** Wire to get iew's information from backwards time buffer. */
-    typename TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
+    TimeBuffer<O3Comm::TimeStruct>::wire fromIEW;
 
     /** Wire to get commit's information from backwards time buffer. */
-    typename TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
+    TimeBuffer<O3Comm::TimeStruct>::wire fromCommit;
 
     //Might be annoying how this name is different than the queue.
     /** Wire used to write any information heading to decode. */
-    typename TimeBuffer<O3Comm::FetchStruct>::wire toDecode;
+    TimeBuffer<O3Comm::FetchStruct>::wire toDecode;
 
     /** BPredUnit. */
     BPredUnit *branchPred;
@@ -540,7 +525,7 @@ class DefaultFetch
   protected:
     struct FetchStatGroup : public Stats::Group
     {
-        FetchStatGroup(FullO3CPU<Impl> *cpu, DefaultFetch *fetch);
+        FetchStatGroup(FullO3CPU<O3CPUImpl> *cpu, DefaultFetch *fetch);
         // @todo: Consider making these
         // vectors and tracking on a per thread basis.
         /** Stat for total number of cycles stalled due to an icache miss. */
diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh
deleted file mode 100644
index 72b10c5609..0000000000
--- a/src/cpu/o3/fetch_impl.hh
+++ /dev/null
@@ -1,1664 +0,0 @@
-/*
- * Copyright (c) 2010-2014 ARM Limited
- * Copyright (c) 2012-2013 AMD
- * All rights reserved.
- *
- * The license below extends only to copyright in the software and shall
- * not be construed as granting a license to any other intellectual
- * property including but not limited to intellectual property relating
- * to a hardware implementation of the functionality of the software
- * licensed hereunder.  You may use the software subject to the license
- * terms below provided that you ensure that this notice is replicated
- * unmodified and in its entirety in all distributions of the software,
- * modified or unmodified, in source code or in binary form.
- *
- * Copyright (c) 2004-2006 The Regents of The University of Michigan
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __CPU_O3_FETCH_IMPL_HH__
-#define __CPU_O3_FETCH_IMPL_HH__
-
-#include <algorithm>
-#include <cstring>
-#include <list>
-#include <map>
-#include <queue>
-
-#include "arch/generic/tlb.hh"
-#include "base/random.hh"
-#include "base/types.hh"
-#include "config/the_isa.hh"
-#include "cpu/base.hh"
-#include "cpu/exetrace.hh"
-#include "cpu/nop_static_inst.hh"
-#include "cpu/o3/cpu.hh"
-#include "cpu/o3/fetch.hh"
-#include "cpu/o3/isa_specific.hh"
-#include "cpu/o3/limits.hh"
-#include "debug/Activity.hh"
-#include "debug/Drain.hh"
-#include "debug/Fetch.hh"
-#include "debug/O3CPU.hh"
-#include "debug/O3PipeView.hh"
-#include "mem/packet.hh"
-#include "params/DerivO3CPU.hh"
-#include "sim/byteswap.hh"
-#include "sim/core.hh"
-#include "sim/eventq.hh"
-#include "sim/full_system.hh"
-#include "sim/system.hh"
-
-template<class Impl>
-DefaultFetch<Impl>::DefaultFetch(FullO3CPU<Impl> *_cpu,
-        const DerivO3CPUParams &params)
-    : fetchPolicy(params.smtFetchPolicy),
-      cpu(_cpu),
-      branchPred(nullptr),
-      decodeToFetchDelay(params.decodeToFetchDelay),
-      renameToFetchDelay(params.renameToFetchDelay),
-      iewToFetchDelay(params.iewToFetchDelay),
-      commitToFetchDelay(params.commitToFetchDelay),
-      fetchWidth(params.fetchWidth),
-      decodeWidth(params.decodeWidth),
-      retryPkt(NULL),
-      retryTid(InvalidThreadID),
-      cacheBlkSize(cpu->cacheLineSize()),
-      fetchBufferSize(params.fetchBufferSize),
-      fetchBufferMask(fetchBufferSize - 1),
-      fetchQueueSize(params.fetchQueueSize),
-      numThreads(params.numThreads),
-      numFetchingThreads(params.smtNumFetchingThreads),
-      icachePort(this, _cpu),
-      finishTranslationEvent(this), fetchStats(_cpu, this)
-{
-    if (numThreads > O3MaxThreads)
-        fatal("numThreads (%d) is larger than compiled limit (%d),\n"
-              "\tincrease O3MaxThreads in src/cpu/o3/limits.hh\n",
-              numThreads, static_cast<int>(O3MaxThreads));
-    if (fetchWidth > O3MaxWidth)
-        fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"
-             "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n",
-             fetchWidth, static_cast<int>(O3MaxWidth));
-    if (fetchBufferSize > cacheBlkSize)
-        fatal("fetch buffer size (%u bytes) is greater than the cache "
-              "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);
-    if (cacheBlkSize % fetchBufferSize)
-        fatal("cache block (%u bytes) is not a multiple of the "
-              "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);
-
-    // Get the size of an instruction.
-    instSize = sizeof(TheISA::MachInst);
-
-    for (int i = 0; i < O3MaxThreads; i++) {
-        fetchStatus[i] = Idle;
-        decoder[i] = nullptr;
-        pc[i] = 0;
-        fetchOffset[i] = 0;
-        macroop[i] = nullptr;
-        delayedCommit[i] = false;
-        memReq[i] = nullptr;
-        stalls[i] = {false, false};
-        fetchBuffer[i] = NULL;
-        fetchBufferPC[i] = 0;
-        fetchBufferValid[i] = false;
-        lastIcacheStall[i] = 0;
-        issuePipelinedIfetch[i] = false;
-    }
-
-    branchPred = params.branchPred;
-
-    for (ThreadID tid = 0; tid < numThreads; tid++) {
-        decoder[tid] = new TheISA::Decoder(
-                dynamic_cast<TheISA::ISA *>(params.isa[tid]));
-        // Create space to buffer the cache line data,
-        // which may not hold the entire cache line.
-        fetchBuffer[tid] = new uint8_t[fetchBufferSize];
-    }
-}
-
-template <class Impl>
-std::string
-DefaultFetch<Impl>::name() const
-{
-    return cpu->name() + ".fetch";
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::regProbePoints()
-{
-    ppFetch = new ProbePointArg<O3DynInstPtr>(cpu->getProbeManager(), "Fetch");
-    ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),
-                                                       "FetchRequest");
-
-}
-
-template <class Impl>
-DefaultFetch<Impl>::
-FetchStatGroup::FetchStatGroup(FullO3CPU<Impl> *cpu, DefaultFetch *fetch)
-    : Stats::Group(cpu, "fetch"),
-    ADD_STAT(icacheStallCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch is stalled on an Icache miss"),
-    ADD_STAT(insts, Stats::Units::Count::get(),
-             "Number of instructions fetch has processed"),
-    ADD_STAT(branches, Stats::Units::Count::get(),
-             "Number of branches that fetch encountered"),
-    ADD_STAT(predictedBranches, Stats::Units::Count::get(),
-             "Number of branches that fetch has predicted taken"),
-    ADD_STAT(cycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has run and was not squashing or "
-             "blocked"),
-    ADD_STAT(squashCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has spent squashing"),
-    ADD_STAT(tlbCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has spent waiting for tlb"),
-    ADD_STAT(idleCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch was idle"),
-    ADD_STAT(blockedCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has spent blocked"),
-    ADD_STAT(miscStallCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has spent waiting on interrupts, or bad "
-             "addresses, or out of MSHRs"),
-    ADD_STAT(pendingDrainCycles, Stats::Units::Cycle::get(),
-             "Number of cycles fetch has spent waiting on pipes to drain"),
-    ADD_STAT(noActiveThreadStallCycles, Stats::Units::Cycle::get(),
-             "Number of stall cycles due to no active thread to fetch from"),
-    ADD_STAT(pendingTrapStallCycles, Stats::Units::Cycle::get(),
-             "Number of stall cycles due to pending traps"),
-    ADD_STAT(pendingQuiesceStallCycles, Stats::Units::Cycle::get(),
-             "Number of stall cycles due to pending quiesce instructions"),
-    ADD_STAT(icacheWaitRetryStallCycles, Stats::Units::Cycle::get(),
-             "Number of stall cycles due to full MSHR"),
-    ADD_STAT(cacheLines, Stats::Units::Count::get(),
-             "Number of cache lines fetched"),
-    ADD_STAT(icacheSquashes, Stats::Units::Count::get(),
-             "Number of outstanding Icache misses that were squashed"),
-    ADD_STAT(tlbSquashes, Stats::Units::Count::get(),
-             "Number of outstanding ITLB misses that were squashed"),
-    ADD_STAT(nisnDist, Stats::Units::Count::get(),
-             "Number of instructions fetched each cycle (Total)"),
-    ADD_STAT(idleRate, Stats::Units::Ratio::get(),
-             "Ratio of cycles fetch was idle",
-             idleCycles / cpu->baseStats.numCycles),
-    ADD_STAT(branchRate, Stats::Units::Ratio::get(),
-             "Number of branch fetches per cycle",
-             branches / cpu->baseStats.numCycles),
-    ADD_STAT(rate, Stats::Units::Rate<
-                    Stats::Units::Count, Stats::Units::Cycle>::get(),
-             "Number of inst fetches per cycle",
-             insts / cpu->baseStats.numCycles)
-{
-        icacheStallCycles
-            .prereq(icacheStallCycles);
-        insts
-            .prereq(insts);
-        branches
-            .prereq(branches);
-        predictedBranches
-            .prereq(predictedBranches);
-        cycles
-            .prereq(cycles);
-        squashCycles
-            .prereq(squashCycles);
-        tlbCycles
-            .prereq(tlbCycles);
-        idleCycles
-            .prereq(idleCycles);
-        blockedCycles
-            .prereq(blockedCycles);
-        cacheLines
-            .prereq(cacheLines);
-        miscStallCycles
-            .prereq(miscStallCycles);
-        pendingDrainCycles
-            .prereq(pendingDrainCycles);
-        noActiveThreadStallCycles
-            .prereq(noActiveThreadStallCycles);
-        pendingTrapStallCycles
-            .prereq(pendingTrapStallCycles);
-        pendingQuiesceStallCycles
-            .prereq(pendingQuiesceStallCycles);
-        icacheWaitRetryStallCycles
-            .prereq(icacheWaitRetryStallCycles);
-        icacheSquashes
-            .prereq(icacheSquashes);
-        tlbSquashes
-            .prereq(tlbSquashes);
-        nisnDist
-            .init(/* base value */ 0,
-              /* last value */ fetch->fetchWidth,
-              /* bucket size */ 1)
-            .flags(Stats::pdf);
-        idleRate
-            .prereq(idleRate);
-        branchRate
-            .flags(Stats::total);
-        rate
-            .flags(Stats::total);
-}
-template<class Impl>
-void
-DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<O3Comm::TimeStruct> *time_buffer)
-{
-    timeBuffer = time_buffer;
-
-    // Create wires to get information from proper places in time buffer.
-    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
-    fromRename = timeBuffer->getWire(-renameToFetchDelay);
-    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
-    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
-{
-    activeThreads = at_ptr;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::setFetchQueue(TimeBuffer<O3Comm::FetchStruct> *ftb_ptr)
-{
-    // Create wire to write information to proper place in fetch time buf.
-    toDecode = ftb_ptr->getWire(0);
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::startupStage()
-{
-    assert(priorityList.empty());
-    resetStage();
-
-    // Fetch needs to start fetching instructions at the very beginning,
-    // so it must start up in active state.
-    switchToActive();
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::clearStates(ThreadID tid)
-{
-    fetchStatus[tid] = Running;
-    pc[tid] = cpu->pcState(tid);
-    fetchOffset[tid] = 0;
-    macroop[tid] = NULL;
-    delayedCommit[tid] = false;
-    memReq[tid] = NULL;
-    stalls[tid].decode = false;
-    stalls[tid].drain = false;
-    fetchBufferPC[tid] = 0;
-    fetchBufferValid[tid] = false;
-    fetchQueue[tid].clear();
-
-    // TODO not sure what to do with priorityList for now
-    // priorityList.push_back(tid);
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::resetStage()
-{
-    numInst = 0;
-    interruptPending = false;
-    cacheBlocked = false;
-
-    priorityList.clear();
-
-    // Setup PC and nextPC with initial state.
-    for (ThreadID tid = 0; tid < numThreads; ++tid) {
-        fetchStatus[tid] = Running;
-        pc[tid] = cpu->pcState(tid);
-        fetchOffset[tid] = 0;
-        macroop[tid] = NULL;
-
-        delayedCommit[tid] = false;
-        memReq[tid] = NULL;
-
-        stalls[tid].decode = false;
-        stalls[tid].drain = false;
-
-        fetchBufferPC[tid] = 0;
-        fetchBufferValid[tid] = false;
-
-        fetchQueue[tid].clear();
-
-        priorityList.push_back(tid);
-    }
-
-    wroteToTimeBuffer = false;
-    _status = Inactive;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
-{
-    ThreadID tid = cpu->contextToThread(pkt->req->contextId());
-
-    DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid);
-    assert(!cpu->switchedOut());
-
-    // Only change the status if it's still waiting on the icache access
-    // to return.
-    if (fetchStatus[tid] != IcacheWaitResponse ||
-        pkt->req != memReq[tid]) {
-        ++fetchStats.icacheSquashes;
-        delete pkt;
-        return;
-    }
-
-    memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);
-    fetchBufferValid[tid] = true;
-
-    // Wake up the CPU (if it went to sleep and was waiting on
-    // this completion event).
-    cpu->wakeCPU();
-
-    DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n",
-            tid);
-
-    switchToActive();
-
-    // Only switch to IcacheAccessComplete if we're not stalled as well.
-    if (checkStall(tid)) {
-        fetchStatus[tid] = Blocked;
-    } else {
-        fetchStatus[tid] = IcacheAccessComplete;
-    }
-
-    pkt->req->setAccessLatency();
-    cpu->ppInstAccessComplete->notify(pkt);
-    // Reset the mem req to NULL.
-    delete pkt;
-    memReq[tid] = NULL;
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::drainResume()
-{
-    for (ThreadID i = 0; i < numThreads; ++i) {
-        stalls[i].decode = false;
-        stalls[i].drain = false;
-    }
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::drainSanityCheck() const
-{
-    assert(isDrained());
-    assert(retryPkt == NULL);
-    assert(retryTid == InvalidThreadID);
-    assert(!cacheBlocked);
-    assert(!interruptPending);
-
-    for (ThreadID i = 0; i < numThreads; ++i) {
-        assert(!memReq[i]);
-        assert(fetchStatus[i] == Idle || stalls[i].drain);
-    }
-
-    branchPred->drainSanityCheck();
-}
-
-template <class Impl>
-bool
-DefaultFetch<Impl>::isDrained() const
-{
-    /* Make sure that threads are either idle of that the commit stage
-     * has signaled that draining has completed by setting the drain
-     * stall flag. This effectively forces the pipeline to be disabled
-     * until the whole system is drained (simulation may continue to
-     * drain other components).
-     */
-    for (ThreadID i = 0; i < numThreads; ++i) {
-        // Verify fetch queues are drained
-        if (!fetchQueue[i].empty())
-            return false;
-
-        // Return false if not idle or drain stalled
-        if (fetchStatus[i] != Idle) {
-            if (fetchStatus[i] == Blocked && stalls[i].drain)
-                continue;
-            else
-                return false;
-        }
-    }
-
-    /* The pipeline might start up again in the middle of the drain
-     * cycle if the finish translation event is scheduled, so make
-     * sure that's not the case.
-     */
-    return !finishTranslationEvent.scheduled();
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::takeOverFrom()
-{
-    assert(cpu->getInstPort().isConnected());
-    resetStage();
-
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::drainStall(ThreadID tid)
-{
-    assert(cpu->isDraining());
-    assert(!stalls[tid].drain);
-    DPRINTF(Drain, "%i: Thread drained.\n", tid);
-    stalls[tid].drain = true;
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::wakeFromQuiesce()
-{
-    DPRINTF(Fetch, "Waking up from quiesce\n");
-    // Hopefully this is safe
-    // @todo: Allow other threads to wake from quiesce.
-    fetchStatus[0] = Running;
-}
-
-template <class Impl>
-inline void
-DefaultFetch<Impl>::switchToActive()
-{
-    if (_status == Inactive) {
-        DPRINTF(Activity, "Activating stage.\n");
-
-        cpu->activateStage(FullO3CPU<Impl>::FetchIdx);
-
-        _status = Active;
-    }
-}
-
-template <class Impl>
-inline void
-DefaultFetch<Impl>::switchToInactive()
-{
-    if (_status == Active) {
-        DPRINTF(Activity, "Deactivating stage.\n");
-
-        cpu->deactivateStage(FullO3CPU<Impl>::FetchIdx);
-
-        _status = Inactive;
-    }
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::deactivateThread(ThreadID tid)
-{
-    // Update priority list
-    auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);
-    if (thread_it != priorityList.end()) {
-        priorityList.erase(thread_it);
-    }
-}
-
-template <class Impl>
-bool
-DefaultFetch<Impl>::lookupAndUpdateNextPC(
-        const O3DynInstPtr &inst, TheISA::PCState &nextPC)
-{
-    // Do branch prediction check here.
-    // A bit of a misnomer...next_PC is actually the current PC until
-    // this function updates it.
-    bool predict_taken;
-
-    if (!inst->isControl()) {
-        inst->staticInst->advancePC(nextPC);
-        inst->setPredTarg(nextPC);
-        inst->setPredTaken(false);
-        return false;
-    }
-
-    ThreadID tid = inst->threadNumber;
-    predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,
-                                        nextPC, tid);
-
-    if (predict_taken) {
-        DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
-                "predicted to be taken to %s\n",
-                tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
-    } else {
-        DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
-                "predicted to be not taken\n",
-                tid, inst->seqNum, inst->pcState().instAddr());
-    }
-
-    DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x "
-            "predicted to go to %s\n",
-            tid, inst->seqNum, inst->pcState().instAddr(), nextPC);
-    inst->setPredTarg(nextPC);
-    inst->setPredTaken(predict_taken);
-
-    ++fetchStats.branches;
-
-    if (predict_taken) {
-        ++fetchStats.predictedBranches;
-    }
-
-    return predict_taken;
-}
-
-template <class Impl>
-bool
-DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)
-{
-    Fault fault = NoFault;
-
-    assert(!cpu->switchedOut());
-
-    // @todo: not sure if these should block translation.
-    //AlphaDep
-    if (cacheBlocked) {
-        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",
-                tid);
-        return false;
-    } else if (checkInterrupt(pc) && !delayedCommit[tid]) {
-        // Hold off fetch from getting new instructions when:
-        // Cache is blocked, or
-        // while an interrupt is pending and we're not in PAL mode, or
-        // fetch is switched out.
-        DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",
-                tid);
-        return false;
-    }
-
-    // Align the fetch address to the start of a fetch buffer segment.
-    Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);
-
-    DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",
-            tid, fetchBufferBlockPC, vaddr);
-
-    // Setup the memReq to do a read of the first instruction's address.
-    // Set the appropriate read size and flags as well.
-    // Build request here.
-    RequestPtr mem_req = std::make_shared<Request>(
-        fetchBufferBlockPC, fetchBufferSize,
-        Request::INST_FETCH, cpu->instRequestorId(), pc,
-        cpu->thread[tid]->contextId());
-
-    mem_req->taskId(cpu->taskId());
-
-    memReq[tid] = mem_req;
-
-    // Initiate translation of the icache block
-    fetchStatus[tid] = ItlbWait;
-    FetchTranslation *trans = new FetchTranslation(this);
-    cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(),
-                              trans, BaseTLB::Execute);
-    return true;
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::finishTranslation(const Fault &fault,
-                                      const RequestPtr &mem_req)
-{
-    ThreadID tid = cpu->contextToThread(mem_req->contextId());
-    Addr fetchBufferBlockPC = mem_req->getVaddr();
-
-    assert(!cpu->switchedOut());
-
-    // Wake up CPU if it was idle
-    cpu->wakeCPU();
-
-    if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||
-        mem_req->getVaddr() != memReq[tid]->getVaddr()) {
-        DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",
-                tid);
-        ++fetchStats.tlbSquashes;
-        return;
-    }
-
-
-    // If translation was successful, attempt to read the icache block.
-    if (fault == NoFault) {
-        // Check that we're not going off into random memory
-        // If we have, just wait around for commit to squash something and put
-        // us on the right track
-        if (!cpu->system->isMemAddr(mem_req->getPaddr())) {
-            warn("Address %#x is outside of physical memory, stopping fetch\n",
-                    mem_req->getPaddr());
-            fetchStatus[tid] = NoGoodAddr;
-            memReq[tid] = NULL;
-            return;
-        }
-
-        // Build packet here.
-        PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);
-        data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);
-
-        fetchBufferPC[tid] = fetchBufferBlockPC;
-        fetchBufferValid[tid] = false;
-        DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
-
-        fetchStats.cacheLines++;
-
-        // Access the cache.
-        if (!icachePort.sendTimingReq(data_pkt)) {
-            assert(retryPkt == NULL);
-            assert(retryTid == InvalidThreadID);
-            DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);
-
-            fetchStatus[tid] = IcacheWaitRetry;
-            retryPkt = data_pkt;
-            retryTid = tid;
-            cacheBlocked = true;
-        } else {
-            DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid);
-            DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache "
-                    "response.\n", tid);
-            lastIcacheStall[tid] = curTick();
-            fetchStatus[tid] = IcacheWaitResponse;
-            // Notify Fetch Request probe when a packet containing a fetch
-            // request is successfully sent
-            ppFetchRequestSent->notify(mem_req);
-        }
-    } else {
-        // Don't send an instruction to decode if we can't handle it.
-        if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {
-            assert(!finishTranslationEvent.scheduled());
-            finishTranslationEvent.setFault(fault);
-            finishTranslationEvent.setReq(mem_req);
-            cpu->schedule(finishTranslationEvent,
-                          cpu->clockEdge(Cycles(1)));
-            return;
-        }
-        DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",
-                tid, mem_req->getVaddr(), memReq[tid]->getVaddr());
-        // Translation faulted, icache request won't be sent.
-        memReq[tid] = NULL;
-
-        // Send the fault to commit.  This thread will not do anything
-        // until commit handles the fault.  The only other way it can
-        // wake up is if a squash comes along and changes the PC.
-        TheISA::PCState fetchPC = pc[tid];
-
-        DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid);
-        // We will use a nop in ordier to carry the fault.
-        O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr,
-                fetchPC, fetchPC, false);
-        instruction->setNotAnInst();
-
-        instruction->setPredTarg(fetchPC);
-        instruction->fault = fault;
-        wroteToTimeBuffer = true;
-
-        DPRINTF(Activity, "Activity this cycle.\n");
-        cpu->activityThisCycle();
-
-        fetchStatus[tid] = TrapPending;
-
-        DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid);
-        DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n",
-                tid, fault->name(), pc[tid]);
-    }
-    _status = updateFetchStatus();
-}
-
-template <class Impl>
-inline void
-DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,
-                             const O3DynInstPtr squashInst, ThreadID tid)
-{
-    DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n",
-            tid, newPC);
-
-    pc[tid] = newPC;
-    fetchOffset[tid] = 0;
-    if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())
-        macroop[tid] = squashInst->macroop;
-    else
-        macroop[tid] = NULL;
-    decoder[tid]->reset();
-
-    // Clear the icache miss if it's outstanding.
-    if (fetchStatus[tid] == IcacheWaitResponse) {
-        DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n",
-                tid);
-        memReq[tid] = NULL;
-    } else if (fetchStatus[tid] == ItlbWait) {
-        DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n",
-                tid);
-        memReq[tid] = NULL;
-    }
-
-    // Get rid of the retrying packet if it was from this thread.
-    if (retryTid == tid) {
-        assert(cacheBlocked);
-        if (retryPkt) {
-            delete retryPkt;
-        }
-        retryPkt = NULL;
-        retryTid = InvalidThreadID;
-    }
-
-    fetchStatus[tid] = Squashing;
-
-    // Empty fetch queue
-    fetchQueue[tid].clear();
-
-    // microops are being squashed, it is not known wheather the
-    // youngest non-squashed microop was  marked delayed commit
-    // or not. Setting the flag to true ensures that the
-    // interrupts are not handled when they cannot be, though
-    // some opportunities to handle interrupts may be missed.
-    delayedCommit[tid] = true;
-
-    ++fetchStats.squashCycles;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,
-                                     const O3DynInstPtr squashInst,
-                                     const InstSeqNum seq_num, ThreadID tid)
-{
-    DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid);
-
-    doSquash(newPC, squashInst, tid);
-
-    // Tell the CPU to remove any instructions that are in flight between
-    // fetch and decode.
-    cpu->removeInstsUntil(seq_num, tid);
-}
-
-template<class Impl>
-bool
-DefaultFetch<Impl>::checkStall(ThreadID tid) const
-{
-    bool ret_val = false;
-
-    if (stalls[tid].drain) {
-        assert(cpu->isDraining());
-        DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid);
-        ret_val = true;
-    }
-
-    return ret_val;
-}
-
-template<class Impl>
-typename DefaultFetch<Impl>::FetchStatus
-DefaultFetch<Impl>::updateFetchStatus()
-{
-    //Check Running
-    std::list<ThreadID>::iterator threads = activeThreads->begin();
-    std::list<ThreadID>::iterator end = activeThreads->end();
-
-    while (threads != end) {
-        ThreadID tid = *threads++;
-
-        if (fetchStatus[tid] == Running ||
-            fetchStatus[tid] == Squashing ||
-            fetchStatus[tid] == IcacheAccessComplete) {
-
-            if (_status == Inactive) {
-                DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid);
-
-                if (fetchStatus[tid] == IcacheAccessComplete) {
-                    DPRINTF(Activity, "[tid:%i] Activating fetch due to cache"
-                            "completion\n",tid);
-                }
-
-                cpu->activateStage(FullO3CPU<Impl>::FetchIdx);
-            }
-
-            return Active;
-        }
-    }
-
-    // Stage is switching from active to inactive, notify CPU of it.
-    if (_status == Active) {
-        DPRINTF(Activity, "Deactivating stage.\n");
-
-        cpu->deactivateStage(FullO3CPU<Impl>::FetchIdx);
-    }
-
-    return Inactive;
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,
-                           const InstSeqNum seq_num, O3DynInstPtr squashInst,
-                           ThreadID tid)
-{
-    DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid);
-
-    doSquash(newPC, squashInst, tid);
-
-    // Tell the CPU to remove any instructions that are not in the ROB.
-    cpu->removeInstsNotInROB(tid);
-}
-
-template <class Impl>
-void
-DefaultFetch<Impl>::tick()
-{
-    std::list<ThreadID>::iterator threads = activeThreads->begin();
-    std::list<ThreadID>::iterator end = activeThreads->end();
-    bool status_change = false;
-
-    wroteToTimeBuffer = false;
-
-    for (ThreadID i = 0; i < numThreads; ++i) {
-        issuePipelinedIfetch[i] = false;
-    }
-
-    while (threads != end) {
-        ThreadID tid = *threads++;
-
-        // Check the signals for each thread to determine the proper status
-        // for each thread.
-        bool updated_status = checkSignalsAndUpdate(tid);
-        status_change =  status_change || updated_status;
-    }
-
-    DPRINTF(Fetch, "Running stage.\n");
-
-    if (FullSystem) {
-        if (fromCommit->commitInfo[0].interruptPending) {
-            interruptPending = true;
-        }
-
-        if (fromCommit->commitInfo[0].clearInterrupt) {
-            interruptPending = false;
-        }
-    }
-
-    for (threadFetched = 0; threadFetched < numFetchingThreads;
-         threadFetched++) {
-        // Fetch each of the actively fetching threads.
-        fetch(status_change);
-    }
-
-    // Record number of instructions fetched this cycle for distribution.
-    fetchStats.nisnDist.sample(numInst);
-
-    if (status_change) {
-        // Change the fetch stage status if there was a status change.
-        _status = updateFetchStatus();
-    }
-
-    // Issue the next I-cache request if possible.
-    for (ThreadID i = 0; i < numThreads; ++i) {
-        if (issuePipelinedIfetch[i]) {
-            pipelineIcacheAccesses(i);
-        }
-    }
-
-    // Send instructions enqueued into the fetch queue to decode.
-    // Limit rate by fetchWidth.  Stall if decode is stalled.
-    unsigned insts_to_decode = 0;
-    unsigned available_insts = 0;
-
-    for (auto tid : *activeThreads) {
-        if (!stalls[tid].decode) {
-            available_insts += fetchQueue[tid].size();
-        }
-    }
-
-    // Pick a random thread to start trying to grab instructions from
-    auto tid_itr = activeThreads->begin();
-    std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));
-
-    while (available_insts != 0 && insts_to_decode < decodeWidth) {
-        ThreadID tid = *tid_itr;
-        if (!stalls[tid].decode && !fetchQueue[tid].empty()) {
-            const auto& inst = fetchQueue[tid].front();
-            toDecode->insts[toDecode->size++] = inst;
-            DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode "
-                    "from fetch queue. Fetch queue size: %i.\n",
-                    tid, inst->seqNum, fetchQueue[tid].size());
-
-            wroteToTimeBuffer = true;
-            fetchQueue[tid].pop_front();
-            insts_to_decode++;
-            available_insts--;
-        }
-
-        tid_itr++;
-        // Wrap around if at end of active threads list
-        if (tid_itr == activeThreads->end())
-            tid_itr = activeThreads->begin();
-    }
-
-    // If there was activity this cycle, inform the CPU of it.
-    if (wroteToTimeBuffer) {
-        DPRINTF(Activity, "Activity this cycle.\n");
-        cpu->activityThisCycle();
-    }
-
-    // Reset the number of the instruction we've fetched.
-    numInst = 0;
-}
-
-template <class Impl>
-bool
-DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)
-{
-    // Update the per thread stall statuses.
-    if (fromDecode->decodeBlock[tid]) {
-        stalls[tid].decode = true;
-    }
-
-    if (fromDecode->decodeUnblock[tid]) {
-        assert(stalls[tid].decode);
-        assert(!fromDecode->decodeBlock[tid]);
-        stalls[tid].decode = false;
-    }
-
-    // Check squash signals from commit.
-    if (fromCommit->commitInfo[tid].squash) {
-
-        DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
-                "from commit.\n",tid);
-        // In any case, squash.
-        squash(fromCommit->commitInfo[tid].pc,
-               fromCommit->commitInfo[tid].doneSeqNum,
-               fromCommit->commitInfo[tid].squashInst, tid);
-
-        // If it was a branch mispredict on a control instruction, update the
-        // branch predictor with that instruction, otherwise just kill the
-        // invalid state we generated in after sequence number
-        if (fromCommit->commitInfo[tid].mispredictInst &&
-            fromCommit->commitInfo[tid].mispredictInst->isControl()) {
-            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
-                              fromCommit->commitInfo[tid].pc,
-                              fromCommit->commitInfo[tid].branchTaken,
-                              tid);
-        } else {
-            branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,
-                              tid);
-        }
-
-        return true;
-    } else if (fromCommit->commitInfo[tid].doneSeqNum) {
-        // Update the branch predictor if it wasn't a squashed instruction
-        // that was broadcasted.
-        branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);
-    }
-
-    // Check squash signals from decode.
-    if (fromDecode->decodeInfo[tid].squash) {
-        DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash "
-                "from decode.\n",tid);
-
-        // Update the branch predictor.
-        if (fromDecode->decodeInfo[tid].branchMispredict) {
-            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
-                              fromDecode->decodeInfo[tid].nextPC,
-                              fromDecode->decodeInfo[tid].branchTaken,
-                              tid);
-        } else {
-            branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,
-                              tid);
-        }
-
-        if (fetchStatus[tid] != Squashing) {
-
-            DPRINTF(Fetch, "Squashing from decode with PC = %s\n",
-                fromDecode->decodeInfo[tid].nextPC);
-            // Squash unless we're already squashing
-            squashFromDecode(fromDecode->decodeInfo[tid].nextPC,
-                             fromDecode->decodeInfo[tid].squashInst,
-                             fromDecode->decodeInfo[tid].doneSeqNum,
-                             tid);
-
-            return true;
-        }
-    }
-
-    if (checkStall(tid) &&
-        fetchStatus[tid] != IcacheWaitResponse &&
-        fetchStatus[tid] != IcacheWaitRetry &&
-        fetchStatus[tid] != ItlbWait &&
-        fetchStatus[tid] != QuiescePending) {
-        DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid);
-
-        fetchStatus[tid] = Blocked;
-
-        return true;
-    }
-
-    if (fetchStatus[tid] == Blocked ||
-        fetchStatus[tid] == Squashing) {
-        // Switch status to running if fetch isn't being told to block or
-        // squash this cycle.
-        DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n",
-                tid);
-
-        fetchStatus[tid] = Running;
-
-        return true;
-    }
-
-    // If we've reached this point, we have not gotten any signals that
-    // cause fetch to change its status.  Fetch remains the same as before.
-    return false;
-}
-
-template<class Impl>
-O3DynInstPtr
-DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,
-                              StaticInstPtr curMacroop, TheISA::PCState thisPC,
-                              TheISA::PCState nextPC, bool trace)
-{
-    // Get a sequence number.
-    InstSeqNum seq = cpu->getAndIncrementInstSeq();
-
-    // Create a new DynInst from the instruction fetched.
-    O3DynInstPtr instruction =
-        new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);
-    instruction->setTid(tid);
-
-    instruction->setThreadState(cpu->thread[tid]);
-
-    DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created "
-            "[sn:%lli].\n", tid, thisPC.instAddr(),
-            thisPC.microPC(), seq);
-
-    DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid,
-            instruction->staticInst->
-            disassemble(thisPC.instAddr()));
-
-#if TRACING_ON
-    if (trace) {
-        instruction->traceData =
-            cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),
-                    instruction->staticInst, thisPC, curMacroop);
-    }
-#else
-    instruction->traceData = NULL;
-#endif
-
-    // Add instruction to the CPU's list of instructions.
-    instruction->setInstListIt(cpu->addInst(instruction));
-
-    // Write the instruction to the first slot in the queue
-    // that heads to decode.
-    assert(numInst < fetchWidth);
-    fetchQueue[tid].push_back(instruction);
-    assert(fetchQueue[tid].size() <= fetchQueueSize);
-    DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n",
-            tid, fetchQueue[tid].size(), fetchQueueSize);
-    //toDecode->insts[toDecode->size++] = instruction;
-
-    // Keep track of if we can take an interrupt at this boundary
-    delayedCommit[tid] = instruction->isDelayedCommit();
-
-    return instruction;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::fetch(bool &status_change)
-{
-    //////////////////////////////////////////
-    // Start actual fetch
-    //////////////////////////////////////////
-    ThreadID tid = getFetchingThread();
-
-    assert(!cpu->switchedOut());
-
-    if (tid == InvalidThreadID) {
-        // Breaks looping condition in tick()
-        threadFetched = numFetchingThreads;
-
-        if (numThreads == 1) {  // @todo Per-thread stats
-            profileStall(0);
-        }
-
-        return;
-    }
-
-    DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);
-
-    // The current PC.
-    TheISA::PCState thisPC = pc[tid];
-
-    Addr pcOffset = fetchOffset[tid];
-    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
-
-    bool inRom = isRomMicroPC(thisPC.microPC());
-
-    // If returning from the delay of a cache miss, then update the status
-    // to running, otherwise do the cache access.  Possibly move this up
-    // to tick() function.
-    if (fetchStatus[tid] == IcacheAccessComplete) {
-        DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid);
-
-        fetchStatus[tid] = Running;
-        status_change = true;
-    } else if (fetchStatus[tid] == Running) {
-        // Align the fetch PC so its at the start of a fetch buffer segment.
-        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
-
-        // If buffer is no longer valid or fetchAddr has moved to point
-        // to the next cache block, AND we have no remaining ucode
-        // from a macro-op, then start fetch from icache.
-        if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])
-            && !inRom && !macroop[tid]) {
-            DPRINTF(Fetch, "[tid:%i] Attempting to translate and read "
-                    "instruction, starting at PC %s.\n", tid, thisPC);
-
-            fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
-
-            if (fetchStatus[tid] == IcacheWaitResponse)
-                ++fetchStats.icacheStallCycles;
-            else if (fetchStatus[tid] == ItlbWait)
-                ++fetchStats.tlbCycles;
-            else
-                ++fetchStats.miscStallCycles;
-            return;
-        } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {
-            // Stall CPU if an interrupt is posted and we're not issuing
-            // an delayed commit micro-op currently (delayed commit instructions
-            // are not interruptable by interrupts, only faults)
-            ++fetchStats.miscStallCycles;
-            DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid);
-            return;
-        }
-    } else {
-        if (fetchStatus[tid] == Idle) {
-            ++fetchStats.idleCycles;
-            DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid);
-        }
-
-        // Status is Idle, so fetch should do nothing.
-        return;
-    }
-
-    ++fetchStats.cycles;
-
-    TheISA::PCState nextPC = thisPC;
-
-    StaticInstPtr staticInst = NULL;
-    StaticInstPtr curMacroop = macroop[tid];
-
-    // If the read of the first instruction was successful, then grab the
-    // instructions from the rest of the cache line and put them into the
-    // queue heading to decode.
-
-    DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to "
-            "decode.\n", tid);
-
-    // Need to keep track of whether or not a predicted branch
-    // ended this fetch block.
-    bool predictedBranch = false;
-
-    // Need to halt fetch if quiesce instruction detected
-    bool quiesce = false;
-
-    TheISA::MachInst *cacheInsts =
-        reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);
-
-    const unsigned numInsts = fetchBufferSize / instSize;
-    unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
-
-    // Loop through instruction memory from the cache.
-    // Keep issuing while fetchWidth is available and branch is not
-    // predicted taken
-    while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize
-           && !predictedBranch && !quiesce) {
-        // We need to process more memory if we aren't going to get a
-        // StaticInst from the rom, the current macroop, or what's already
-        // in the decoder.
-        bool needMem = !inRom && !curMacroop &&
-            !decoder[tid]->instReady();
-        fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
-        Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
-
-        if (needMem) {
-            // If buffer is no longer valid or fetchAddr has moved to point
-            // to the next cache block then start fetch from icache.
-            if (!fetchBufferValid[tid] ||
-                fetchBufferBlockPC != fetchBufferPC[tid])
-                break;
-
-            if (blkOffset >= numInsts) {
-                // We need to process more memory, but we've run out of the
-                // current block.
-                break;
-            }
-
-            decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]);
-
-            if (decoder[tid]->needMoreBytes()) {
-                blkOffset++;
-                fetchAddr += instSize;
-                pcOffset += instSize;
-            }
-        }
-
-        // Extract as many instructions and/or microops as we can from
-        // the memory we've processed so far.
-        do {
-            if (!(curMacroop || inRom)) {
-                if (decoder[tid]->instReady()) {
-                    staticInst = decoder[tid]->decode(thisPC);
-
-                    // Increment stat of fetched instructions.
-                    ++fetchStats.insts;
-
-                    if (staticInst->isMacroop()) {
-                        curMacroop = staticInst;
-                    } else {
-                        pcOffset = 0;
-                    }
-                } else {
-                    // We need more bytes for this instruction so blkOffset and
-                    // pcOffset will be updated
-                    break;
-                }
-            }
-            // Whether we're moving to a new macroop because we're at the
-            // end of the current one, or the branch predictor incorrectly
-            // thinks we are...
-            bool newMacro = false;
-            if (curMacroop || inRom) {
-                if (inRom) {
-                    staticInst = decoder[tid]->fetchRomMicroop(
-                            thisPC.microPC(), curMacroop);
-                } else {
-                    staticInst = curMacroop->fetchMicroop(thisPC.microPC());
-                }
-                newMacro |= staticInst->isLastMicroop();
-            }
-
-            O3DynInstPtr instruction =
-                buildInst(tid, staticInst, curMacroop,
-                          thisPC, nextPC, true);
-
-            ppFetch->notify(instruction);
-            numInst++;
-
-#if TRACING_ON
-            if (Debug::O3PipeView) {
-                instruction->fetchTick = curTick();
-            }
-#endif
-
-            nextPC = thisPC;
-
-            // If we're branching after this instruction, quit fetching
-            // from the same block.
-            predictedBranch |= thisPC.branching();
-            predictedBranch |=
-                lookupAndUpdateNextPC(instruction, nextPC);
-            if (predictedBranch) {
-                DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);
-            }
-
-            newMacro |= thisPC.instAddr() != nextPC.instAddr();
-
-            // Move to the next instruction, unless we have a branch.
-            thisPC = nextPC;
-            inRom = isRomMicroPC(thisPC.microPC());
-
-            if (newMacro) {
-                fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;
-                blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;
-                pcOffset = 0;
-                curMacroop = NULL;
-            }
-
-            if (instruction->isQuiesce()) {
-                DPRINTF(Fetch,
-                        "Quiesce instruction encountered, halting fetch!\n");
-                fetchStatus[tid] = QuiescePending;
-                status_change = true;
-                quiesce = true;
-                break;
-            }
-        } while ((curMacroop || decoder[tid]->instReady()) &&
-                 numInst < fetchWidth &&
-                 fetchQueue[tid].size() < fetchQueueSize);
-
-        // Re-evaluate whether the next instruction to fetch is in micro-op ROM
-        // or not.
-        inRom = isRomMicroPC(thisPC.microPC());
-    }
-
-    if (predictedBranch) {
-        DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch "
-                "instruction encountered.\n", tid);
-    } else if (numInst >= fetchWidth) {
-        DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth "
-                "for this cycle.\n", tid);
-    } else if (blkOffset >= fetchBufferSize) {
-        DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the"
-                "fetch buffer.\n", tid);
-    }
-
-    macroop[tid] = curMacroop;
-    fetchOffset[tid] = pcOffset;
-
-    if (numInst > 0) {
-        wroteToTimeBuffer = true;
-    }
-
-    pc[tid] = thisPC;
-
-    // pipeline a fetch if we're crossing a fetch buffer boundary and not in
-    // a state that would preclude fetching
-    fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
-    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
-    issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&
-        fetchStatus[tid] != IcacheWaitResponse &&
-        fetchStatus[tid] != ItlbWait &&
-        fetchStatus[tid] != IcacheWaitRetry &&
-        fetchStatus[tid] != QuiescePending &&
-        !curMacroop;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::recvReqRetry()
-{
-    if (retryPkt != NULL) {
-        assert(cacheBlocked);
-        assert(retryTid != InvalidThreadID);
-        assert(fetchStatus[retryTid] == IcacheWaitRetry);
-
-        if (icachePort.sendTimingReq(retryPkt)) {
-            fetchStatus[retryTid] = IcacheWaitResponse;
-            // Notify Fetch Request probe when a retryPkt is successfully sent.
-            // Note that notify must be called before retryPkt is set to NULL.
-            ppFetchRequestSent->notify(retryPkt->req);
-            retryPkt = NULL;
-            retryTid = InvalidThreadID;
-            cacheBlocked = false;
-        }
-    } else {
-        assert(retryTid == InvalidThreadID);
-        // Access has been squashed since it was sent out.  Just clear
-        // the cache being blocked.
-        cacheBlocked = false;
-    }
-}
-
-///////////////////////////////////////
-//                                   //
-//  SMT FETCH POLICY MAINTAINED HERE //
-//                                   //
-///////////////////////////////////////
-template<class Impl>
-ThreadID
-DefaultFetch<Impl>::getFetchingThread()
-{
-    if (numThreads > 1) {
-        switch (fetchPolicy) {
-          case SMTFetchPolicy::RoundRobin:
-            return roundRobin();
-          case SMTFetchPolicy::IQCount:
-            return iqCount();
-          case SMTFetchPolicy::LSQCount:
-            return lsqCount();
-          case SMTFetchPolicy::Branch:
-            return branchCount();
-          default:
-            return InvalidThreadID;
-        }
-    } else {
-        std::list<ThreadID>::iterator thread = activeThreads->begin();
-        if (thread == activeThreads->end()) {
-            return InvalidThreadID;
-        }
-
-        ThreadID tid = *thread;
-
-        if (fetchStatus[tid] == Running ||
-            fetchStatus[tid] == IcacheAccessComplete ||
-            fetchStatus[tid] == Idle) {
-            return tid;
-        } else {
-            return InvalidThreadID;
-        }
-    }
-}
-
-
-template<class Impl>
-ThreadID
-DefaultFetch<Impl>::roundRobin()
-{
-    std::list<ThreadID>::iterator pri_iter = priorityList.begin();
-    std::list<ThreadID>::iterator end      = priorityList.end();
-
-    ThreadID high_pri;
-
-    while (pri_iter != end) {
-        high_pri = *pri_iter;
-
-        assert(high_pri <= numThreads);
-
-        if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheAccessComplete ||
-            fetchStatus[high_pri] == Idle) {
-
-            priorityList.erase(pri_iter);
-            priorityList.push_back(high_pri);
-
-            return high_pri;
-        }
-
-        pri_iter++;
-    }
-
-    return InvalidThreadID;
-}
-
-template<class Impl>
-ThreadID
-DefaultFetch<Impl>::iqCount()
-{
-    //sorted from lowest->highest
-    std::priority_queue<unsigned, std::vector<unsigned>,
-                        std::greater<unsigned> > PQ;
-    std::map<unsigned, ThreadID> threadMap;
-
-    std::list<ThreadID>::iterator threads = activeThreads->begin();
-    std::list<ThreadID>::iterator end = activeThreads->end();
-
-    while (threads != end) {
-        ThreadID tid = *threads++;
-        unsigned iqCount = fromIEW->iewInfo[tid].iqCount;
-
-        //we can potentially get tid collisions if two threads
-        //have the same iqCount, but this should be rare.
-        PQ.push(iqCount);
-        threadMap[iqCount] = tid;
-    }
-
-    while (!PQ.empty()) {
-        ThreadID high_pri = threadMap[PQ.top()];
-
-        if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheAccessComplete ||
-            fetchStatus[high_pri] == Idle)
-            return high_pri;
-        else
-            PQ.pop();
-
-    }
-
-    return InvalidThreadID;
-}
-
-template<class Impl>
-ThreadID
-DefaultFetch<Impl>::lsqCount()
-{
-    //sorted from lowest->highest
-    std::priority_queue<unsigned, std::vector<unsigned>,
-                        std::greater<unsigned> > PQ;
-    std::map<unsigned, ThreadID> threadMap;
-
-    std::list<ThreadID>::iterator threads = activeThreads->begin();
-    std::list<ThreadID>::iterator end = activeThreads->end();
-
-    while (threads != end) {
-        ThreadID tid = *threads++;
-        unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;
-
-        //we can potentially get tid collisions if two threads
-        //have the same iqCount, but this should be rare.
-        PQ.push(ldstqCount);
-        threadMap[ldstqCount] = tid;
-    }
-
-    while (!PQ.empty()) {
-        ThreadID high_pri = threadMap[PQ.top()];
-
-        if (fetchStatus[high_pri] == Running ||
-            fetchStatus[high_pri] == IcacheAccessComplete ||
-            fetchStatus[high_pri] == Idle)
-            return high_pri;
-        else
-            PQ.pop();
-    }
-
-    return InvalidThreadID;
-}
-
-template<class Impl>
-ThreadID
-DefaultFetch<Impl>::branchCount()
-{
-    panic("Branch Count Fetch policy unimplemented\n");
-    return InvalidThreadID;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)
-{
-    if (!issuePipelinedIfetch[tid]) {
-        return;
-    }
-
-    // The next PC to access.
-    TheISA::PCState thisPC = pc[tid];
-
-    if (isRomMicroPC(thisPC.microPC())) {
-        return;
-    }
-
-    Addr pcOffset = fetchOffset[tid];
-    Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;
-
-    // Align the fetch PC so its at the start of a fetch buffer segment.
-    Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);
-
-    // Unless buffer already got the block, fetch it from icache.
-    if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {
-        DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, "
-                "starting at PC %s.\n", tid, thisPC);
-
-        fetchCacheLine(fetchAddr, tid, thisPC.instAddr());
-    }
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::profileStall(ThreadID tid) {
-    DPRINTF(Fetch,"There are no more threads available to fetch from.\n");
-
-    // @todo Per-thread stats
-
-    if (stalls[tid].drain) {
-        ++fetchStats.pendingDrainCycles;
-        DPRINTF(Fetch, "Fetch is waiting for a drain!\n");
-    } else if (activeThreads->empty()) {
-        ++fetchStats.noActiveThreadStallCycles;
-        DPRINTF(Fetch, "Fetch has no active thread!\n");
-    } else if (fetchStatus[tid] == Blocked) {
-        ++fetchStats.blockedCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid);
-    } else if (fetchStatus[tid] == Squashing) {
-        ++fetchStats.squashCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
-    } else if (fetchStatus[tid] == IcacheWaitResponse) {
-        ++fetchStats.icacheStallCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
-                tid);
-    } else if (fetchStatus[tid] == ItlbWait) {
-        ++fetchStats.tlbCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to "
-                "finish!\n", tid);
-    } else if (fetchStatus[tid] == TrapPending) {
-        ++fetchStats.pendingTrapStallCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n",
-                tid);
-    } else if (fetchStatus[tid] == QuiescePending) {
-        ++fetchStats.pendingQuiesceStallCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce "
-                "instruction!\n", tid);
-    } else if (fetchStatus[tid] == IcacheWaitRetry) {
-        ++fetchStats.icacheWaitRetryStallCycles;
-        DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n",
-                tid);
-    } else if (fetchStatus[tid] == NoGoodAddr) {
-            DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n",
-                    tid);
-    } else {
-        DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason "
-            "(Status: %i)\n",
-            tid, fetchStatus[tid]);
-    }
-}
-
-template<class Impl>
-bool
-DefaultFetch<Impl>::IcachePort::recvTimingResp(PacketPtr pkt)
-{
-    DPRINTF(O3CPU, "Fetch unit received timing\n");
-    // We shouldn't ever get a cacheable block in Modified state
-    assert(pkt->req->isUncacheable() ||
-           !(pkt->cacheResponding() && !pkt->hasSharers()));
-    fetch->processCacheCompletion(pkt);
-
-    return true;
-}
-
-template<class Impl>
-void
-DefaultFetch<Impl>::IcachePort::recvReqRetry()
-{
-    fetch->recvReqRetry();
-}
-
-#endif//__CPU_O3_FETCH_IMPL_HH__