From f4a33801671511f8f1a761501d11c42e5c916d76 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 3 Mar 2021 03:12:06 -0800 Subject: [PATCH] cpu: De-templatize the O3 DefaultFetch. Change-Id: I5d4ce7a269c9f1df497003404872a977e7edb575 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42109 Tested-by: kokoro Reviewed-by: Gabe Black Maintainer: Gabe Black --- src/cpu/o3/cpu.hh | 2 +- src/cpu/o3/fetch.cc | 1598 +++++++++++++++++++++++++++++++++++- src/cpu/o3/fetch.hh | 69 +- src/cpu/o3/fetch_impl.hh | 1664 -------------------------------------- 4 files changed, 1622 insertions(+), 1711 deletions(-) delete mode 100644 src/cpu/o3/fetch_impl.hh diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 2b220024d8..fdbd1fffe6 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -491,7 +491,7 @@ class FullO3CPU : public BaseO3CPU protected: /** The fetch stage. */ - DefaultFetch fetch; + DefaultFetch fetch; /** The decode stage. */ DefaultDecode decode; diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index f027b92847..fe2b416025 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -1,5 +1,18 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2010-2014 ARM Limited + * Copyright (c) 2012-2013 AMD + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,7 +39,1584 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "cpu/o3/fetch_impl.hh" -#include "cpu/o3/isa_specific.hh" +#include "cpu/o3/fetch.hh" -template class DefaultFetch; +#include +#include +#include +#include +#include + +#include "arch/generic/tlb.hh" +#include "base/random.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "cpu/base.hh" +#include "cpu/exetrace.hh" +#include "cpu/nop_static_inst.hh" +#include "cpu/o3/cpu.hh" +#include "cpu/o3/isa_specific.hh" +#include "cpu/o3/limits.hh" +#include "debug/Activity.hh" +#include "debug/Drain.hh" +#include "debug/Fetch.hh" +#include "debug/O3CPU.hh" +#include "debug/O3PipeView.hh" +#include "mem/packet.hh" +#include "params/DerivO3CPU.hh" +#include "sim/byteswap.hh" +#include "sim/core.hh" +#include "sim/eventq.hh" +#include "sim/full_system.hh" +#include "sim/system.hh" + +DefaultFetch::IcachePort::IcachePort(DefaultFetch *_fetch, + FullO3CPU* _cpu) : + RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch) +{} + + +DefaultFetch::DefaultFetch(FullO3CPU *_cpu, + const DerivO3CPUParams ¶ms) + : fetchPolicy(params.smtFetchPolicy), + cpu(_cpu), + branchPred(nullptr), + decodeToFetchDelay(params.decodeToFetchDelay), + renameToFetchDelay(params.renameToFetchDelay), + iewToFetchDelay(params.iewToFetchDelay), + commitToFetchDelay(params.commitToFetchDelay), + fetchWidth(params.fetchWidth), + decodeWidth(params.decodeWidth), + retryPkt(NULL), + retryTid(InvalidThreadID), + cacheBlkSize(cpu->cacheLineSize()), + fetchBufferSize(params.fetchBufferSize), + fetchBufferMask(fetchBufferSize - 1), + fetchQueueSize(params.fetchQueueSize), + numThreads(params.numThreads), + numFetchingThreads(params.smtNumFetchingThreads), + icachePort(this, _cpu), + finishTranslationEvent(this), fetchStats(_cpu, this) +{ + if (numThreads > O3MaxThreads) + fatal("numThreads (%d) is larger than compiled limit (%d),\n" + "\tincrease O3MaxThreads in src/cpu/o3/limits.hh\n", + numThreads, static_cast(O3MaxThreads)); + if (fetchWidth > O3MaxWidth) + fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" + "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n", + fetchWidth, static_cast(O3MaxWidth)); + if (fetchBufferSize > cacheBlkSize) + fatal("fetch buffer size (%u bytes) is greater than the cache " + "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); + if (cacheBlkSize % fetchBufferSize) + fatal("cache block (%u bytes) is not a multiple of the " + "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize); + + // Get the size of an instruction. + instSize = sizeof(TheISA::MachInst); + + for (int i = 0; i < O3MaxThreads; i++) { + fetchStatus[i] = Idle; + decoder[i] = nullptr; + pc[i] = 0; + fetchOffset[i] = 0; + macroop[i] = nullptr; + delayedCommit[i] = false; + memReq[i] = nullptr; + stalls[i] = {false, false}; + fetchBuffer[i] = NULL; + fetchBufferPC[i] = 0; + fetchBufferValid[i] = false; + lastIcacheStall[i] = 0; + issuePipelinedIfetch[i] = false; + } + + branchPred = params.branchPred; + + for (ThreadID tid = 0; tid < numThreads; tid++) { + decoder[tid] = new TheISA::Decoder( + dynamic_cast(params.isa[tid])); + // Create space to buffer the cache line data, + // which may not hold the entire cache line. + fetchBuffer[tid] = new uint8_t[fetchBufferSize]; + } +} + +std::string DefaultFetch::name() const { return cpu->name() + ".fetch"; } + +void +DefaultFetch::regProbePoints() +{ + ppFetch = new ProbePointArg(cpu->getProbeManager(), "Fetch"); + ppFetchRequestSent = new ProbePointArg(cpu->getProbeManager(), + "FetchRequest"); + +} + +DefaultFetch::FetchStatGroup::FetchStatGroup( + FullO3CPU *cpu, DefaultFetch *fetch) + : Stats::Group(cpu, "fetch"), + ADD_STAT(icacheStallCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch is stalled on an Icache miss"), + ADD_STAT(insts, Stats::Units::Count::get(), + "Number of instructions fetch has processed"), + ADD_STAT(branches, Stats::Units::Count::get(), + "Number of branches that fetch encountered"), + ADD_STAT(predictedBranches, Stats::Units::Count::get(), + "Number of branches that fetch has predicted taken"), + ADD_STAT(cycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has run and was not squashing or " + "blocked"), + ADD_STAT(squashCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has spent squashing"), + ADD_STAT(tlbCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has spent waiting for tlb"), + ADD_STAT(idleCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch was idle"), + ADD_STAT(blockedCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has spent blocked"), + ADD_STAT(miscStallCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has spent waiting on interrupts, or bad " + "addresses, or out of MSHRs"), + ADD_STAT(pendingDrainCycles, Stats::Units::Cycle::get(), + "Number of cycles fetch has spent waiting on pipes to drain"), + ADD_STAT(noActiveThreadStallCycles, Stats::Units::Cycle::get(), + "Number of stall cycles due to no active thread to fetch from"), + ADD_STAT(pendingTrapStallCycles, Stats::Units::Cycle::get(), + "Number of stall cycles due to pending traps"), + ADD_STAT(pendingQuiesceStallCycles, Stats::Units::Cycle::get(), + "Number of stall cycles due to pending quiesce instructions"), + ADD_STAT(icacheWaitRetryStallCycles, Stats::Units::Cycle::get(), + "Number of stall cycles due to full MSHR"), + ADD_STAT(cacheLines, Stats::Units::Count::get(), + "Number of cache lines fetched"), + ADD_STAT(icacheSquashes, Stats::Units::Count::get(), + "Number of outstanding Icache misses that were squashed"), + ADD_STAT(tlbSquashes, Stats::Units::Count::get(), + "Number of outstanding ITLB misses that were squashed"), + ADD_STAT(nisnDist, Stats::Units::Count::get(), + "Number of instructions fetched each cycle (Total)"), + ADD_STAT(idleRate, Stats::Units::Ratio::get(), + "Ratio of cycles fetch was idle", + idleCycles / cpu->baseStats.numCycles), + ADD_STAT(branchRate, Stats::Units::Ratio::get(), + "Number of branch fetches per cycle", + branches / cpu->baseStats.numCycles), + ADD_STAT(rate, Stats::Units::Rate< + Stats::Units::Count, Stats::Units::Cycle>::get(), + "Number of inst fetches per cycle", + insts / cpu->baseStats.numCycles) +{ + icacheStallCycles + .prereq(icacheStallCycles); + insts + .prereq(insts); + branches + .prereq(branches); + predictedBranches + .prereq(predictedBranches); + cycles + .prereq(cycles); + squashCycles + .prereq(squashCycles); + tlbCycles + .prereq(tlbCycles); + idleCycles + .prereq(idleCycles); + blockedCycles + .prereq(blockedCycles); + cacheLines + .prereq(cacheLines); + miscStallCycles + .prereq(miscStallCycles); + pendingDrainCycles + .prereq(pendingDrainCycles); + noActiveThreadStallCycles + .prereq(noActiveThreadStallCycles); + pendingTrapStallCycles + .prereq(pendingTrapStallCycles); + pendingQuiesceStallCycles + .prereq(pendingQuiesceStallCycles); + icacheWaitRetryStallCycles + .prereq(icacheWaitRetryStallCycles); + icacheSquashes + .prereq(icacheSquashes); + tlbSquashes + .prereq(tlbSquashes); + nisnDist + .init(/* base value */ 0, + /* last value */ fetch->fetchWidth, + /* bucket size */ 1) + .flags(Stats::pdf); + idleRate + .prereq(idleRate); + branchRate + .flags(Stats::total); + rate + .flags(Stats::total); +} +void +DefaultFetch::setTimeBuffer(TimeBuffer *time_buffer) +{ + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromDecode = timeBuffer->getWire(-decodeToFetchDelay); + fromRename = timeBuffer->getWire(-renameToFetchDelay); + fromIEW = timeBuffer->getWire(-iewToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +void +DefaultFetch::setActiveThreads(std::list *at_ptr) +{ + activeThreads = at_ptr; +} + +void +DefaultFetch::setFetchQueue(TimeBuffer *ftb_ptr) +{ + // Create wire to write information to proper place in fetch time buf. + toDecode = ftb_ptr->getWire(0); +} + +void +DefaultFetch::startupStage() +{ + assert(priorityList.empty()); + resetStage(); + + // Fetch needs to start fetching instructions at the very beginning, + // so it must start up in active state. + switchToActive(); +} + +void +DefaultFetch::clearStates(ThreadID tid) +{ + fetchStatus[tid] = Running; + pc[tid] = cpu->pcState(tid); + fetchOffset[tid] = 0; + macroop[tid] = NULL; + delayedCommit[tid] = false; + memReq[tid] = NULL; + stalls[tid].decode = false; + stalls[tid].drain = false; + fetchBufferPC[tid] = 0; + fetchBufferValid[tid] = false; + fetchQueue[tid].clear(); + + // TODO not sure what to do with priorityList for now + // priorityList.push_back(tid); +} + +void +DefaultFetch::resetStage() +{ + numInst = 0; + interruptPending = false; + cacheBlocked = false; + + priorityList.clear(); + + // Setup PC and nextPC with initial state. + for (ThreadID tid = 0; tid < numThreads; ++tid) { + fetchStatus[tid] = Running; + pc[tid] = cpu->pcState(tid); + fetchOffset[tid] = 0; + macroop[tid] = NULL; + + delayedCommit[tid] = false; + memReq[tid] = NULL; + + stalls[tid].decode = false; + stalls[tid].drain = false; + + fetchBufferPC[tid] = 0; + fetchBufferValid[tid] = false; + + fetchQueue[tid].clear(); + + priorityList.push_back(tid); + } + + wroteToTimeBuffer = false; + _status = Inactive; +} + +void +DefaultFetch::processCacheCompletion(PacketPtr pkt) +{ + ThreadID tid = cpu->contextToThread(pkt->req->contextId()); + + DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid); + assert(!cpu->switchedOut()); + + // Only change the status if it's still waiting on the icache access + // to return. + if (fetchStatus[tid] != IcacheWaitResponse || + pkt->req != memReq[tid]) { + ++fetchStats.icacheSquashes; + delete pkt; + return; + } + + memcpy(fetchBuffer[tid], pkt->getConstPtr(), fetchBufferSize); + fetchBufferValid[tid] = true; + + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); + + DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + + // Only switch to IcacheAccessComplete if we're not stalled as well. + if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheAccessComplete; + } + + pkt->req->setAccessLatency(); + cpu->ppInstAccessComplete->notify(pkt); + // Reset the mem req to NULL. + delete pkt; + memReq[tid] = NULL; +} + +void +DefaultFetch::drainResume() +{ + for (ThreadID i = 0; i < numThreads; ++i) { + stalls[i].decode = false; + stalls[i].drain = false; + } +} + +void +DefaultFetch::drainSanityCheck() const +{ + assert(isDrained()); + assert(retryPkt == NULL); + assert(retryTid == InvalidThreadID); + assert(!cacheBlocked); + assert(!interruptPending); + + for (ThreadID i = 0; i < numThreads; ++i) { + assert(!memReq[i]); + assert(fetchStatus[i] == Idle || stalls[i].drain); + } + + branchPred->drainSanityCheck(); +} + +bool +DefaultFetch::isDrained() const +{ + /* Make sure that threads are either idle of that the commit stage + * has signaled that draining has completed by setting the drain + * stall flag. This effectively forces the pipeline to be disabled + * until the whole system is drained (simulation may continue to + * drain other components). + */ + for (ThreadID i = 0; i < numThreads; ++i) { + // Verify fetch queues are drained + if (!fetchQueue[i].empty()) + return false; + + // Return false if not idle or drain stalled + if (fetchStatus[i] != Idle) { + if (fetchStatus[i] == Blocked && stalls[i].drain) + continue; + else + return false; + } + } + + /* The pipeline might start up again in the middle of the drain + * cycle if the finish translation event is scheduled, so make + * sure that's not the case. + */ + return !finishTranslationEvent.scheduled(); +} + +void +DefaultFetch::takeOverFrom() +{ + assert(cpu->getInstPort().isConnected()); + resetStage(); + +} + +void +DefaultFetch::drainStall(ThreadID tid) +{ + assert(cpu->isDraining()); + assert(!stalls[tid].drain); + DPRINTF(Drain, "%i: Thread drained.\n", tid); + stalls[tid].drain = true; +} + +void +DefaultFetch::wakeFromQuiesce() +{ + DPRINTF(Fetch, "Waking up from quiesce\n"); + // Hopefully this is safe + // @todo: Allow other threads to wake from quiesce. + fetchStatus[0] = Running; +} + +void +DefaultFetch::switchToActive() +{ + if (_status == Inactive) { + DPRINTF(Activity, "Activating stage.\n"); + + cpu->activateStage(FullO3CPU::FetchIdx); + + _status = Active; + } +} + +void +DefaultFetch::switchToInactive() +{ + if (_status == Active) { + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(FullO3CPU::FetchIdx); + + _status = Inactive; + } +} + +void +DefaultFetch::deactivateThread(ThreadID tid) +{ + // Update priority list + auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid); + if (thread_it != priorityList.end()) { + priorityList.erase(thread_it); + } +} + +bool +DefaultFetch::lookupAndUpdateNextPC(const O3DynInstPtr &inst, + TheISA::PCState &nextPC) +{ + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + bool predict_taken; + + if (!inst->isControl()) { + inst->staticInst->advancePC(nextPC); + inst->setPredTarg(nextPC); + inst->setPredTaken(false); + return false; + } + + ThreadID tid = inst->threadNumber; + predict_taken = branchPred->predict(inst->staticInst, inst->seqNum, + nextPC, tid); + + if (predict_taken) { + DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " + "predicted to be taken to %s\n", + tid, inst->seqNum, inst->pcState().instAddr(), nextPC); + } else { + DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " + "predicted to be not taken\n", + tid, inst->seqNum, inst->pcState().instAddr()); + } + + DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " + "predicted to go to %s\n", + tid, inst->seqNum, inst->pcState().instAddr(), nextPC); + inst->setPredTarg(nextPC); + inst->setPredTaken(predict_taken); + + ++fetchStats.branches; + + if (predict_taken) { + ++fetchStats.predictedBranches; + } + + return predict_taken; +} + +bool +DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) +{ + Fault fault = NoFault; + + assert(!cpu->switchedOut()); + + // @todo: not sure if these should block translation. + //AlphaDep + if (cacheBlocked) { + DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", + tid); + return false; + } else if (checkInterrupt(pc) && !delayedCommit[tid]) { + // Hold off fetch from getting new instructions when: + // Cache is blocked, or + // while an interrupt is pending and we're not in PAL mode, or + // fetch is switched out. + DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", + tid); + return false; + } + + // Align the fetch address to the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr); + + DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", + tid, fetchBufferBlockPC, vaddr); + + // Setup the memReq to do a read of the first instruction's address. + // Set the appropriate read size and flags as well. + // Build request here. + RequestPtr mem_req = std::make_shared( + fetchBufferBlockPC, fetchBufferSize, + Request::INST_FETCH, cpu->instRequestorId(), pc, + cpu->thread[tid]->contextId()); + + mem_req->taskId(cpu->taskId()); + + memReq[tid] = mem_req; + + // Initiate translation of the icache block + fetchStatus[tid] = ItlbWait; + FetchTranslation *trans = new FetchTranslation(this); + cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(), + trans, BaseTLB::Execute); + return true; +} + +void +DefaultFetch::finishTranslation(const Fault &fault, const RequestPtr &mem_req) +{ + ThreadID tid = cpu->contextToThread(mem_req->contextId()); + Addr fetchBufferBlockPC = mem_req->getVaddr(); + + assert(!cpu->switchedOut()); + + // Wake up CPU if it was idle + cpu->wakeCPU(); + + if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] || + mem_req->getVaddr() != memReq[tid]->getVaddr()) { + DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", + tid); + ++fetchStats.tlbSquashes; + return; + } + + + // If translation was successful, attempt to read the icache block. + if (fault == NoFault) { + // Check that we're not going off into random memory + // If we have, just wait around for commit to squash something and put + // us on the right track + if (!cpu->system->isMemAddr(mem_req->getPaddr())) { + warn("Address %#x is outside of physical memory, stopping fetch\n", + mem_req->getPaddr()); + fetchStatus[tid] = NoGoodAddr; + memReq[tid] = NULL; + return; + } + + // Build packet here. + PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); + data_pkt->dataDynamic(new uint8_t[fetchBufferSize]); + + fetchBufferPC[tid] = fetchBufferBlockPC; + fetchBufferValid[tid] = false; + DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); + + fetchStats.cacheLines++; + + // Access the cache. + if (!icachePort.sendTimingReq(data_pkt)) { + assert(retryPkt == NULL); + assert(retryTid == InvalidThreadID); + DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); + + fetchStatus[tid] = IcacheWaitRetry; + retryPkt = data_pkt; + retryTid = tid; + cacheBlocked = true; + } else { + DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid); + DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache " + "response.\n", tid); + lastIcacheStall[tid] = curTick(); + fetchStatus[tid] = IcacheWaitResponse; + // Notify Fetch Request probe when a packet containing a fetch + // request is successfully sent + ppFetchRequestSent->notify(mem_req); + } + } else { + // Don't send an instruction to decode if we can't handle it. + if (!(numInst < fetchWidth) || + !(fetchQueue[tid].size() < fetchQueueSize)) { + assert(!finishTranslationEvent.scheduled()); + finishTranslationEvent.setFault(fault); + finishTranslationEvent.setReq(mem_req); + cpu->schedule(finishTranslationEvent, + cpu->clockEdge(Cycles(1))); + return; + } + DPRINTF(Fetch, + "[tid:%i] Got back req with addr %#x but expected %#x\n", + tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); + // Translation faulted, icache request won't be sent. + memReq[tid] = NULL; + + // Send the fault to commit. This thread will not do anything + // until commit handles the fault. The only other way it can + // wake up is if a squash comes along and changes the PC. + TheISA::PCState fetchPC = pc[tid]; + + DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid); + // We will use a nop in ordier to carry the fault. + O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr, + fetchPC, fetchPC, false); + instruction->setNotAnInst(); + + instruction->setPredTarg(fetchPC); + instruction->fault = fault; + wroteToTimeBuffer = true; + + DPRINTF(Activity, "Activity this cycle.\n"); + cpu->activityThisCycle(); + + fetchStatus[tid] = TrapPending; + + DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid); + DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n", + tid, fault->name(), pc[tid]); + } + _status = updateFetchStatus(); +} + +void +DefaultFetch::doSquash(const TheISA::PCState &newPC, + const O3DynInstPtr squashInst, ThreadID tid) +{ + DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n", + tid, newPC); + + pc[tid] = newPC; + fetchOffset[tid] = 0; + if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) + macroop[tid] = squashInst->macroop; + else + macroop[tid] = NULL; + decoder[tid]->reset(); + + // Clear the icache miss if it's outstanding. + if (fetchStatus[tid] == IcacheWaitResponse) { + DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n", + tid); + memReq[tid] = NULL; + } else if (fetchStatus[tid] == ItlbWait) { + DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n", + tid); + memReq[tid] = NULL; + } + + // Get rid of the retrying packet if it was from this thread. + if (retryTid == tid) { + assert(cacheBlocked); + if (retryPkt) { + delete retryPkt; + } + retryPkt = NULL; + retryTid = InvalidThreadID; + } + + fetchStatus[tid] = Squashing; + + // Empty fetch queue + fetchQueue[tid].clear(); + + // microops are being squashed, it is not known wheather the + // youngest non-squashed microop was marked delayed commit + // or not. Setting the flag to true ensures that the + // interrupts are not handled when they cannot be, though + // some opportunities to handle interrupts may be missed. + delayedCommit[tid] = true; + + ++fetchStats.squashCycles; +} + +void +DefaultFetch::squashFromDecode(const TheISA::PCState &newPC, + const O3DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid) +{ + DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid); + + doSquash(newPC, squashInst, tid); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and decode. + cpu->removeInstsUntil(seq_num, tid); +} + +bool +DefaultFetch::checkStall(ThreadID tid) const +{ + bool ret_val = false; + + if (stalls[tid].drain) { + assert(cpu->isDraining()); + DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid); + ret_val = true; + } + + return ret_val; +} + +DefaultFetch::FetchStatus +DefaultFetch::updateFetchStatus() +{ + //Check Running + std::list::iterator threads = activeThreads->begin(); + std::list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (fetchStatus[tid] == Running || + fetchStatus[tid] == Squashing || + fetchStatus[tid] == IcacheAccessComplete) { + + if (_status == Inactive) { + DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid); + + if (fetchStatus[tid] == IcacheAccessComplete) { + DPRINTF(Activity, "[tid:%i] Activating fetch due to cache" + "completion\n",tid); + } + + cpu->activateStage(FullO3CPU::FetchIdx); + } + + return Active; + } + } + + // Stage is switching from active to inactive, notify CPU of it. + if (_status == Active) { + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(FullO3CPU::FetchIdx); + } + + return Inactive; +} + +void +DefaultFetch::squash(const TheISA::PCState &newPC, const InstSeqNum seq_num, + O3DynInstPtr squashInst, ThreadID tid) +{ + DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid); + + doSquash(newPC, squashInst, tid); + + // Tell the CPU to remove any instructions that are not in the ROB. + cpu->removeInstsNotInROB(tid); +} + +void +DefaultFetch::tick() +{ + std::list::iterator threads = activeThreads->begin(); + std::list::iterator end = activeThreads->end(); + bool status_change = false; + + wroteToTimeBuffer = false; + + for (ThreadID i = 0; i < numThreads; ++i) { + issuePipelinedIfetch[i] = false; + } + + while (threads != end) { + ThreadID tid = *threads++; + + // Check the signals for each thread to determine the proper status + // for each thread. + bool updated_status = checkSignalsAndUpdate(tid); + status_change = status_change || updated_status; + } + + DPRINTF(Fetch, "Running stage.\n"); + + if (FullSystem) { + if (fromCommit->commitInfo[0].interruptPending) { + interruptPending = true; + } + + if (fromCommit->commitInfo[0].clearInterrupt) { + interruptPending = false; + } + } + + for (threadFetched = 0; threadFetched < numFetchingThreads; + threadFetched++) { + // Fetch each of the actively fetching threads. + fetch(status_change); + } + + // Record number of instructions fetched this cycle for distribution. + fetchStats.nisnDist.sample(numInst); + + if (status_change) { + // Change the fetch stage status if there was a status change. + _status = updateFetchStatus(); + } + + // Issue the next I-cache request if possible. + for (ThreadID i = 0; i < numThreads; ++i) { + if (issuePipelinedIfetch[i]) { + pipelineIcacheAccesses(i); + } + } + + // Send instructions enqueued into the fetch queue to decode. + // Limit rate by fetchWidth. Stall if decode is stalled. + unsigned insts_to_decode = 0; + unsigned available_insts = 0; + + for (auto tid : *activeThreads) { + if (!stalls[tid].decode) { + available_insts += fetchQueue[tid].size(); + } + } + + // Pick a random thread to start trying to grab instructions from + auto tid_itr = activeThreads->begin(); + std::advance(tid_itr, + random_mt.random(0, activeThreads->size() - 1)); + + while (available_insts != 0 && insts_to_decode < decodeWidth) { + ThreadID tid = *tid_itr; + if (!stalls[tid].decode && !fetchQueue[tid].empty()) { + const auto& inst = fetchQueue[tid].front(); + toDecode->insts[toDecode->size++] = inst; + DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode " + "from fetch queue. Fetch queue size: %i.\n", + tid, inst->seqNum, fetchQueue[tid].size()); + + wroteToTimeBuffer = true; + fetchQueue[tid].pop_front(); + insts_to_decode++; + available_insts--; + } + + tid_itr++; + // Wrap around if at end of active threads list + if (tid_itr == activeThreads->end()) + tid_itr = activeThreads->begin(); + } + + // If there was activity this cycle, inform the CPU of it. + if (wroteToTimeBuffer) { + DPRINTF(Activity, "Activity this cycle.\n"); + cpu->activityThisCycle(); + } + + // Reset the number of the instruction we've fetched. + numInst = 0; +} + +bool +DefaultFetch::checkSignalsAndUpdate(ThreadID tid) +{ + // Update the per thread stall statuses. + if (fromDecode->decodeBlock[tid]) { + stalls[tid].decode = true; + } + + if (fromDecode->decodeUnblock[tid]) { + assert(stalls[tid].decode); + assert(!fromDecode->decodeBlock[tid]); + stalls[tid].decode = false; + } + + // Check squash signals from commit. + if (fromCommit->commitInfo[tid].squash) { + + DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " + "from commit.\n",tid); + // In any case, squash. + squash(fromCommit->commitInfo[tid].pc, + fromCommit->commitInfo[tid].doneSeqNum, + fromCommit->commitInfo[tid].squashInst, tid); + + // If it was a branch mispredict on a control instruction, update the + // branch predictor with that instruction, otherwise just kill the + // invalid state we generated in after sequence number + if (fromCommit->commitInfo[tid].mispredictInst && + fromCommit->commitInfo[tid].mispredictInst->isControl()) { + branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, + fromCommit->commitInfo[tid].pc, + fromCommit->commitInfo[tid].branchTaken, + tid); + } else { + branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, + tid); + } + + return true; + } else if (fromCommit->commitInfo[tid].doneSeqNum) { + // Update the branch predictor if it wasn't a squashed instruction + // that was broadcasted. + branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid); + } + + // Check squash signals from decode. + if (fromDecode->decodeInfo[tid].squash) { + DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " + "from decode.\n",tid); + + // Update the branch predictor. + if (fromDecode->decodeInfo[tid].branchMispredict) { + branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, + fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].branchTaken, + tid); + } else { + branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, + tid); + } + + if (fetchStatus[tid] != Squashing) { + + DPRINTF(Fetch, "Squashing from decode with PC = %s\n", + fromDecode->decodeInfo[tid].nextPC); + // Squash unless we're already squashing + squashFromDecode(fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].squashInst, + fromDecode->decodeInfo[tid].doneSeqNum, + tid); + + return true; + } + } + + if (checkStall(tid) && + fetchStatus[tid] != IcacheWaitResponse && + fetchStatus[tid] != IcacheWaitRetry && + fetchStatus[tid] != ItlbWait && + fetchStatus[tid] != QuiescePending) { + DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid); + + fetchStatus[tid] = Blocked; + + return true; + } + + if (fetchStatus[tid] == Blocked || + fetchStatus[tid] == Squashing) { + // Switch status to running if fetch isn't being told to block or + // squash this cycle. + DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n", + tid); + + fetchStatus[tid] = Running; + + return true; + } + + // If we've reached this point, we have not gotten any signals that + // cause fetch to change its status. Fetch remains the same as before. + return false; +} + +O3DynInstPtr +DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst, + StaticInstPtr curMacroop, TheISA::PCState thisPC, + TheISA::PCState nextPC, bool trace) +{ + // Get a sequence number. + InstSeqNum seq = cpu->getAndIncrementInstSeq(); + + // Create a new DynInst from the instruction fetched. + O3DynInstPtr instruction = + new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); + instruction->setTid(tid); + + instruction->setThreadState(cpu->thread[tid]); + + DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created " + "[sn:%lli].\n", tid, thisPC.instAddr(), + thisPC.microPC(), seq); + + DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid, + instruction->staticInst-> + disassemble(thisPC.instAddr())); + +#if TRACING_ON + if (trace) { + instruction->traceData = + cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), + instruction->staticInst, thisPC, curMacroop); + } +#else + instruction->traceData = NULL; +#endif + + // Add instruction to the CPU's list of instructions. + instruction->setInstListIt(cpu->addInst(instruction)); + + // Write the instruction to the first slot in the queue + // that heads to decode. + assert(numInst < fetchWidth); + fetchQueue[tid].push_back(instruction); + assert(fetchQueue[tid].size() <= fetchQueueSize); + DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n", + tid, fetchQueue[tid].size(), fetchQueueSize); + //toDecode->insts[toDecode->size++] = instruction; + + // Keep track of if we can take an interrupt at this boundary + delayedCommit[tid] = instruction->isDelayedCommit(); + + return instruction; +} + +void +DefaultFetch::fetch(bool &status_change) +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + ThreadID tid = getFetchingThread(); + + assert(!cpu->switchedOut()); + + if (tid == InvalidThreadID) { + // Breaks looping condition in tick() + threadFetched = numFetchingThreads; + + if (numThreads == 1) { // @todo Per-thread stats + profileStall(0); + } + + return; + } + + DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); + + // The current PC. + TheISA::PCState thisPC = pc[tid]; + + Addr pcOffset = fetchOffset[tid]; + Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + + bool inRom = isRomMicroPC(thisPC.microPC()); + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. + if (fetchStatus[tid] == IcacheAccessComplete) { + DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid); + + fetchStatus[tid] = Running; + status_change = true; + } else if (fetchStatus[tid] == Running) { + // Align the fetch PC so its at the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); + + // If buffer is no longer valid or fetchAddr has moved to point + // to the next cache block, AND we have no remaining ucode + // from a macro-op, then start fetch from icache. + if (!(fetchBufferValid[tid] && + fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom && + !macroop[tid]) { + DPRINTF(Fetch, "[tid:%i] Attempting to translate and read " + "instruction, starting at PC %s.\n", tid, thisPC); + + fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); + + if (fetchStatus[tid] == IcacheWaitResponse) + ++fetchStats.icacheStallCycles; + else if (fetchStatus[tid] == ItlbWait) + ++fetchStats.tlbCycles; + else + ++fetchStats.miscStallCycles; + return; + } else if (checkInterrupt(thisPC.instAddr() && !delayedCommit[tid])) { + // Stall CPU if an interrupt is posted and we're not issuing + // an delayed commit micro-op currently (delayed commit + // instructions are not interruptable by interrupts, only faults) + ++fetchStats.miscStallCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid); + return; + } + } else { + if (fetchStatus[tid] == Idle) { + ++fetchStats.idleCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid); + } + + // Status is Idle, so fetch should do nothing. + return; + } + + ++fetchStats.cycles; + + TheISA::PCState nextPC = thisPC; + + StaticInstPtr staticInst = NULL; + StaticInstPtr curMacroop = macroop[tid]; + + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. + + DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to " + "decode.\n", tid); + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predictedBranch = false; + + // Need to halt fetch if quiesce instruction detected + bool quiesce = false; + + TheISA::MachInst *cacheInsts = + reinterpret_cast(fetchBuffer[tid]); + + const unsigned numInsts = fetchBufferSize / instSize; + unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; + + // Loop through instruction memory from the cache. + // Keep issuing while fetchWidth is available and branch is not + // predicted taken + while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize + && !predictedBranch && !quiesce) { + // We need to process more memory if we aren't going to get a + // StaticInst from the rom, the current macroop, or what's already + // in the decoder. + bool needMem = !inRom && !curMacroop && + !decoder[tid]->instReady(); + fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); + + if (needMem) { + // If buffer is no longer valid or fetchAddr has moved to point + // to the next cache block then start fetch from icache. + if (!fetchBufferValid[tid] || + fetchBufferBlockPC != fetchBufferPC[tid]) + break; + + if (blkOffset >= numInsts) { + // We need to process more memory, but we've run out of the + // current block. + break; + } + + decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]); + + if (decoder[tid]->needMoreBytes()) { + blkOffset++; + fetchAddr += instSize; + pcOffset += instSize; + } + } + + // Extract as many instructions and/or microops as we can from + // the memory we've processed so far. + do { + if (!(curMacroop || inRom)) { + if (decoder[tid]->instReady()) { + staticInst = decoder[tid]->decode(thisPC); + + // Increment stat of fetched instructions. + ++fetchStats.insts; + + if (staticInst->isMacroop()) { + curMacroop = staticInst; + } else { + pcOffset = 0; + } + } else { + // We need more bytes for this instruction so blkOffset and + // pcOffset will be updated + break; + } + } + // Whether we're moving to a new macroop because we're at the + // end of the current one, or the branch predictor incorrectly + // thinks we are... + bool newMacro = false; + if (curMacroop || inRom) { + if (inRom) { + staticInst = decoder[tid]->fetchRomMicroop( + thisPC.microPC(), curMacroop); + } else { + staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + } + newMacro |= staticInst->isLastMicroop(); + } + + O3DynInstPtr instruction = + buildInst(tid, staticInst, curMacroop, + thisPC, nextPC, true); + + ppFetch->notify(instruction); + numInst++; + +#if TRACING_ON + if (Debug::O3PipeView) { + instruction->fetchTick = curTick(); + } +#endif + + nextPC = thisPC; + + // If we're branching after this instruction, quit fetching + // from the same block. + predictedBranch |= thisPC.branching(); + predictedBranch |= + lookupAndUpdateNextPC(instruction, nextPC); + if (predictedBranch) { + DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); + } + + newMacro |= thisPC.instAddr() != nextPC.instAddr(); + + // Move to the next instruction, unless we have a branch. + thisPC = nextPC; + inRom = isRomMicroPC(thisPC.microPC()); + + if (newMacro) { + fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; + blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; + pcOffset = 0; + curMacroop = NULL; + } + + if (instruction->isQuiesce()) { + DPRINTF(Fetch, + "Quiesce instruction encountered, halting fetch!\n"); + fetchStatus[tid] = QuiescePending; + status_change = true; + quiesce = true; + break; + } + } while ((curMacroop || decoder[tid]->instReady()) && + numInst < fetchWidth && + fetchQueue[tid].size() < fetchQueueSize); + + // Re-evaluate whether the next instruction to fetch is in micro-op ROM + // or not. + inRom = isRomMicroPC(thisPC.microPC()); + } + + if (predictedBranch) { + DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch " + "instruction encountered.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (blkOffset >= fetchBufferSize) { + DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the" + "fetch buffer.\n", tid); + } + + macroop[tid] = curMacroop; + fetchOffset[tid] = pcOffset; + + if (numInst > 0) { + wroteToTimeBuffer = true; + } + + pc[tid] = thisPC; + + // pipeline a fetch if we're crossing a fetch buffer boundary and not in + // a state that would preclude fetching + fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); + issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] && + fetchStatus[tid] != IcacheWaitResponse && + fetchStatus[tid] != ItlbWait && + fetchStatus[tid] != IcacheWaitRetry && + fetchStatus[tid] != QuiescePending && + !curMacroop; +} + +void +DefaultFetch::recvReqRetry() +{ + if (retryPkt != NULL) { + assert(cacheBlocked); + assert(retryTid != InvalidThreadID); + assert(fetchStatus[retryTid] == IcacheWaitRetry); + + if (icachePort.sendTimingReq(retryPkt)) { + fetchStatus[retryTid] = IcacheWaitResponse; + // Notify Fetch Request probe when a retryPkt is successfully sent. + // Note that notify must be called before retryPkt is set to NULL. + ppFetchRequestSent->notify(retryPkt->req); + retryPkt = NULL; + retryTid = InvalidThreadID; + cacheBlocked = false; + } + } else { + assert(retryTid == InvalidThreadID); + // Access has been squashed since it was sent out. Just clear + // the cache being blocked. + cacheBlocked = false; + } +} + +/////////////////////////////////////// +// // +// SMT FETCH POLICY MAINTAINED HERE // +// // +/////////////////////////////////////// +ThreadID +DefaultFetch::getFetchingThread() +{ + if (numThreads > 1) { + switch (fetchPolicy) { + case SMTFetchPolicy::RoundRobin: + return roundRobin(); + case SMTFetchPolicy::IQCount: + return iqCount(); + case SMTFetchPolicy::LSQCount: + return lsqCount(); + case SMTFetchPolicy::Branch: + return branchCount(); + default: + return InvalidThreadID; + } + } else { + std::list::iterator thread = activeThreads->begin(); + if (thread == activeThreads->end()) { + return InvalidThreadID; + } + + ThreadID tid = *thread; + + if (fetchStatus[tid] == Running || + fetchStatus[tid] == IcacheAccessComplete || + fetchStatus[tid] == Idle) { + return tid; + } else { + return InvalidThreadID; + } + } +} + + +ThreadID +DefaultFetch::roundRobin() +{ + std::list::iterator pri_iter = priorityList.begin(); + std::list::iterator end = priorityList.end(); + + ThreadID high_pri; + + while (pri_iter != end) { + high_pri = *pri_iter; + + assert(high_pri <= numThreads); + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) { + + priorityList.erase(pri_iter); + priorityList.push_back(high_pri); + + return high_pri; + } + + pri_iter++; + } + + return InvalidThreadID; +} + +ThreadID +DefaultFetch::iqCount() +{ + //sorted from lowest->highest + std::priority_queue, + std::greater > PQ; + std::map threadMap; + + std::list::iterator threads = activeThreads->begin(); + std::list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + unsigned iqCount = fromIEW->iewInfo[tid].iqCount; + + //we can potentially get tid collisions if two threads + //have the same iqCount, but this should be rare. + PQ.push(iqCount); + threadMap[iqCount] = tid; + } + + while (!PQ.empty()) { + ThreadID high_pri = threadMap[PQ.top()]; + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) + return high_pri; + else + PQ.pop(); + + } + + return InvalidThreadID; +} + +ThreadID +DefaultFetch::lsqCount() +{ + //sorted from lowest->highest + std::priority_queue, + std::greater > PQ; + std::map threadMap; + + std::list::iterator threads = activeThreads->begin(); + std::list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; + + //we can potentially get tid collisions if two threads + //have the same iqCount, but this should be rare. + PQ.push(ldstqCount); + threadMap[ldstqCount] = tid; + } + + while (!PQ.empty()) { + ThreadID high_pri = threadMap[PQ.top()]; + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) + return high_pri; + else + PQ.pop(); + } + + return InvalidThreadID; +} + +ThreadID +DefaultFetch::branchCount() +{ + panic("Branch Count Fetch policy unimplemented\n"); + return InvalidThreadID; +} + +void +DefaultFetch::pipelineIcacheAccesses(ThreadID tid) +{ + if (!issuePipelinedIfetch[tid]) { + return; + } + + // The next PC to access. + TheISA::PCState thisPC = pc[tid]; + + if (isRomMicroPC(thisPC.microPC())) { + return; + } + + Addr pcOffset = fetchOffset[tid]; + Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; + + // Align the fetch PC so its at the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); + + // Unless buffer already got the block, fetch it from icache. + if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) { + DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, " + "starting at PC %s.\n", tid, thisPC); + + fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); + } +} + +void +DefaultFetch::profileStall(ThreadID tid) +{ + DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); + + // @todo Per-thread stats + + if (stalls[tid].drain) { + ++fetchStats.pendingDrainCycles; + DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); + } else if (activeThreads->empty()) { + ++fetchStats.noActiveThreadStallCycles; + DPRINTF(Fetch, "Fetch has no active thread!\n"); + } else if (fetchStatus[tid] == Blocked) { + ++fetchStats.blockedCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid); + } else if (fetchStatus[tid] == Squashing) { + ++fetchStats.squashCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitResponse) { + ++fetchStats.icacheStallCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", + tid); + } else if (fetchStatus[tid] == ItlbWait) { + ++fetchStats.tlbCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to " + "finish!\n", tid); + } else if (fetchStatus[tid] == TrapPending) { + ++fetchStats.pendingTrapStallCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n", + tid); + } else if (fetchStatus[tid] == QuiescePending) { + ++fetchStats.pendingQuiesceStallCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce " + "instruction!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitRetry) { + ++fetchStats.icacheWaitRetryStallCycles; + DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n", + tid); + } else if (fetchStatus[tid] == NoGoodAddr) { + DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n", + tid); + } else { + DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason " + "(Status: %i)\n", + tid, fetchStatus[tid]); + } +} + +bool +DefaultFetch::IcachePort::recvTimingResp(PacketPtr pkt) +{ + DPRINTF(O3CPU, "Fetch unit received timing\n"); + // We shouldn't ever get a cacheable block in Modified state + assert(pkt->req->isUncacheable() || + !(pkt->cacheResponding() && !pkt->hasSharers())); + fetch->processCacheCompletion(pkt); + + return true; +} + +void +DefaultFetch::IcachePort::recvReqRetry() +{ + fetch->recvReqRetry(); +} diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index ff6ffee119..c8fe6d398e 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -46,6 +46,7 @@ #include "config/the_isa.hh" #include "cpu/o3/comm.hh" #include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/impl.hh" #include "cpu/o3/limits.hh" #include "cpu/pc_event.hh" #include "cpu/pred/bpred_unit.hh" @@ -69,7 +70,6 @@ class FullO3CPU; * It supports the idling functionality of the CPU by indicating to * the CPU when it is active and inactive. */ -template class DefaultFetch { public: @@ -80,13 +80,11 @@ class DefaultFetch { protected: /** Pointer to fetch. */ - DefaultFetch *fetch; + DefaultFetch *fetch; public: /** Default constructor. */ - IcachePort(DefaultFetch *_fetch, FullO3CPU* _cpu) - : RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch) - { } + IcachePort(DefaultFetch *_fetch, FullO3CPU* _cpu); protected: @@ -101,16 +99,12 @@ class DefaultFetch class FetchTranslation : public BaseTLB::Translation { protected: - DefaultFetch *fetch; + DefaultFetch *fetch; public: - FetchTranslation(DefaultFetch *_fetch) - : fetch(_fetch) - {} + FetchTranslation(DefaultFetch *_fetch) : fetch(_fetch) {} - void - markDelayed() - {} + void markDelayed() {} void finish(const Fault &fault, const RequestPtr &req, ThreadContext *tc, @@ -129,33 +123,28 @@ class DefaultFetch class FinishTranslationEvent : public Event { private: - DefaultFetch *fetch; + DefaultFetch *fetch; Fault fault; RequestPtr req; public: - FinishTranslationEvent(DefaultFetch *_fetch) + FinishTranslationEvent(DefaultFetch *_fetch) : fetch(_fetch), req(nullptr) {} - void setFault(Fault _fault) - { - fault = _fault; - } - - void setReq(const RequestPtr &_req) - { - req = _req; - } + void setFault(Fault _fault) { fault = _fault; } + void setReq(const RequestPtr &_req) { req = _req; } /** Process the delayed finish translation */ - void process() + void + process() { assert(fetch->numInst < fetch->fetchWidth); fetch->finishTranslation(fault, req); } - const char *description() const + const char * + description() const { return "FullO3CPU FetchFinishTranslation"; } @@ -208,7 +197,7 @@ class DefaultFetch public: /** DefaultFetch constructor. */ - DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams ¶ms); + DefaultFetch(FullO3CPU *_cpu, const DerivO3CPUParams ¶ms); /** Returns the name of fetch. */ std::string name() const; @@ -273,12 +262,12 @@ class DefaultFetch /** Changes the status of this stage to active, and indicates this * to the CPU. */ - inline void switchToActive(); + void switchToActive(); /** Changes the status of this stage to inactive, and indicates * this to the CPU. */ - inline void switchToInactive(); + void switchToInactive(); /** * Looks up in the branch predictor to see if the next PC should be @@ -308,15 +297,11 @@ class DefaultFetch /** Check if an interrupt is pending and that we need to handle */ - bool - checkInterrupt(Addr pc) - { - return interruptPending; - } + bool checkInterrupt(Addr pc) { return interruptPending; } /** Squashes a specific thread and resets the PC. */ - inline void doSquash(const TheISA::PCState &newPC, - const O3DynInstPtr squashInst, ThreadID tid); + void doSquash(const TheISA::PCState &newPC, + const O3DynInstPtr squashInst, ThreadID tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode @@ -398,26 +383,26 @@ class DefaultFetch private: /** Pointer to the O3CPU. */ - FullO3CPU *cpu; + FullO3CPU *cpu; /** Time buffer interface. */ TimeBuffer *timeBuffer; /** Wire to get decode's information from backwards time buffer. */ - typename TimeBuffer::wire fromDecode; + TimeBuffer::wire fromDecode; /** Wire to get rename's information from backwards time buffer. */ - typename TimeBuffer::wire fromRename; + TimeBuffer::wire fromRename; /** Wire to get iew's information from backwards time buffer. */ - typename TimeBuffer::wire fromIEW; + TimeBuffer::wire fromIEW; /** Wire to get commit's information from backwards time buffer. */ - typename TimeBuffer::wire fromCommit; + TimeBuffer::wire fromCommit; //Might be annoying how this name is different than the queue. /** Wire used to write any information heading to decode. */ - typename TimeBuffer::wire toDecode; + TimeBuffer::wire toDecode; /** BPredUnit. */ BPredUnit *branchPred; @@ -540,7 +525,7 @@ class DefaultFetch protected: struct FetchStatGroup : public Stats::Group { - FetchStatGroup(FullO3CPU *cpu, DefaultFetch *fetch); + FetchStatGroup(FullO3CPU *cpu, DefaultFetch *fetch); // @todo: Consider making these // vectors and tracking on a per thread basis. /** Stat for total number of cycles stalled due to an icache miss. */ diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh deleted file mode 100644 index 72b10c5609..0000000000 --- a/src/cpu/o3/fetch_impl.hh +++ /dev/null @@ -1,1664 +0,0 @@ -/* - * Copyright (c) 2010-2014 ARM Limited - * Copyright (c) 2012-2013 AMD - * All rights reserved. - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_FETCH_IMPL_HH__ -#define __CPU_O3_FETCH_IMPL_HH__ - -#include -#include -#include -#include -#include - -#include "arch/generic/tlb.hh" -#include "base/random.hh" -#include "base/types.hh" -#include "config/the_isa.hh" -#include "cpu/base.hh" -#include "cpu/exetrace.hh" -#include "cpu/nop_static_inst.hh" -#include "cpu/o3/cpu.hh" -#include "cpu/o3/fetch.hh" -#include "cpu/o3/isa_specific.hh" -#include "cpu/o3/limits.hh" -#include "debug/Activity.hh" -#include "debug/Drain.hh" -#include "debug/Fetch.hh" -#include "debug/O3CPU.hh" -#include "debug/O3PipeView.hh" -#include "mem/packet.hh" -#include "params/DerivO3CPU.hh" -#include "sim/byteswap.hh" -#include "sim/core.hh" -#include "sim/eventq.hh" -#include "sim/full_system.hh" -#include "sim/system.hh" - -template -DefaultFetch::DefaultFetch(FullO3CPU *_cpu, - const DerivO3CPUParams ¶ms) - : fetchPolicy(params.smtFetchPolicy), - cpu(_cpu), - branchPred(nullptr), - decodeToFetchDelay(params.decodeToFetchDelay), - renameToFetchDelay(params.renameToFetchDelay), - iewToFetchDelay(params.iewToFetchDelay), - commitToFetchDelay(params.commitToFetchDelay), - fetchWidth(params.fetchWidth), - decodeWidth(params.decodeWidth), - retryPkt(NULL), - retryTid(InvalidThreadID), - cacheBlkSize(cpu->cacheLineSize()), - fetchBufferSize(params.fetchBufferSize), - fetchBufferMask(fetchBufferSize - 1), - fetchQueueSize(params.fetchQueueSize), - numThreads(params.numThreads), - numFetchingThreads(params.smtNumFetchingThreads), - icachePort(this, _cpu), - finishTranslationEvent(this), fetchStats(_cpu, this) -{ - if (numThreads > O3MaxThreads) - fatal("numThreads (%d) is larger than compiled limit (%d),\n" - "\tincrease O3MaxThreads in src/cpu/o3/limits.hh\n", - numThreads, static_cast(O3MaxThreads)); - if (fetchWidth > O3MaxWidth) - fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" - "\tincrease O3MaxWidth in src/cpu/o3/limits.hh\n", - fetchWidth, static_cast(O3MaxWidth)); - if (fetchBufferSize > cacheBlkSize) - fatal("fetch buffer size (%u bytes) is greater than the cache " - "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); - if (cacheBlkSize % fetchBufferSize) - fatal("cache block (%u bytes) is not a multiple of the " - "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize); - - // Get the size of an instruction. - instSize = sizeof(TheISA::MachInst); - - for (int i = 0; i < O3MaxThreads; i++) { - fetchStatus[i] = Idle; - decoder[i] = nullptr; - pc[i] = 0; - fetchOffset[i] = 0; - macroop[i] = nullptr; - delayedCommit[i] = false; - memReq[i] = nullptr; - stalls[i] = {false, false}; - fetchBuffer[i] = NULL; - fetchBufferPC[i] = 0; - fetchBufferValid[i] = false; - lastIcacheStall[i] = 0; - issuePipelinedIfetch[i] = false; - } - - branchPred = params.branchPred; - - for (ThreadID tid = 0; tid < numThreads; tid++) { - decoder[tid] = new TheISA::Decoder( - dynamic_cast(params.isa[tid])); - // Create space to buffer the cache line data, - // which may not hold the entire cache line. - fetchBuffer[tid] = new uint8_t[fetchBufferSize]; - } -} - -template -std::string -DefaultFetch::name() const -{ - return cpu->name() + ".fetch"; -} - -template -void -DefaultFetch::regProbePoints() -{ - ppFetch = new ProbePointArg(cpu->getProbeManager(), "Fetch"); - ppFetchRequestSent = new ProbePointArg(cpu->getProbeManager(), - "FetchRequest"); - -} - -template -DefaultFetch:: -FetchStatGroup::FetchStatGroup(FullO3CPU *cpu, DefaultFetch *fetch) - : Stats::Group(cpu, "fetch"), - ADD_STAT(icacheStallCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch is stalled on an Icache miss"), - ADD_STAT(insts, Stats::Units::Count::get(), - "Number of instructions fetch has processed"), - ADD_STAT(branches, Stats::Units::Count::get(), - "Number of branches that fetch encountered"), - ADD_STAT(predictedBranches, Stats::Units::Count::get(), - "Number of branches that fetch has predicted taken"), - ADD_STAT(cycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has run and was not squashing or " - "blocked"), - ADD_STAT(squashCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has spent squashing"), - ADD_STAT(tlbCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has spent waiting for tlb"), - ADD_STAT(idleCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch was idle"), - ADD_STAT(blockedCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has spent blocked"), - ADD_STAT(miscStallCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has spent waiting on interrupts, or bad " - "addresses, or out of MSHRs"), - ADD_STAT(pendingDrainCycles, Stats::Units::Cycle::get(), - "Number of cycles fetch has spent waiting on pipes to drain"), - ADD_STAT(noActiveThreadStallCycles, Stats::Units::Cycle::get(), - "Number of stall cycles due to no active thread to fetch from"), - ADD_STAT(pendingTrapStallCycles, Stats::Units::Cycle::get(), - "Number of stall cycles due to pending traps"), - ADD_STAT(pendingQuiesceStallCycles, Stats::Units::Cycle::get(), - "Number of stall cycles due to pending quiesce instructions"), - ADD_STAT(icacheWaitRetryStallCycles, Stats::Units::Cycle::get(), - "Number of stall cycles due to full MSHR"), - ADD_STAT(cacheLines, Stats::Units::Count::get(), - "Number of cache lines fetched"), - ADD_STAT(icacheSquashes, Stats::Units::Count::get(), - "Number of outstanding Icache misses that were squashed"), - ADD_STAT(tlbSquashes, Stats::Units::Count::get(), - "Number of outstanding ITLB misses that were squashed"), - ADD_STAT(nisnDist, Stats::Units::Count::get(), - "Number of instructions fetched each cycle (Total)"), - ADD_STAT(idleRate, Stats::Units::Ratio::get(), - "Ratio of cycles fetch was idle", - idleCycles / cpu->baseStats.numCycles), - ADD_STAT(branchRate, Stats::Units::Ratio::get(), - "Number of branch fetches per cycle", - branches / cpu->baseStats.numCycles), - ADD_STAT(rate, Stats::Units::Rate< - Stats::Units::Count, Stats::Units::Cycle>::get(), - "Number of inst fetches per cycle", - insts / cpu->baseStats.numCycles) -{ - icacheStallCycles - .prereq(icacheStallCycles); - insts - .prereq(insts); - branches - .prereq(branches); - predictedBranches - .prereq(predictedBranches); - cycles - .prereq(cycles); - squashCycles - .prereq(squashCycles); - tlbCycles - .prereq(tlbCycles); - idleCycles - .prereq(idleCycles); - blockedCycles - .prereq(blockedCycles); - cacheLines - .prereq(cacheLines); - miscStallCycles - .prereq(miscStallCycles); - pendingDrainCycles - .prereq(pendingDrainCycles); - noActiveThreadStallCycles - .prereq(noActiveThreadStallCycles); - pendingTrapStallCycles - .prereq(pendingTrapStallCycles); - pendingQuiesceStallCycles - .prereq(pendingQuiesceStallCycles); - icacheWaitRetryStallCycles - .prereq(icacheWaitRetryStallCycles); - icacheSquashes - .prereq(icacheSquashes); - tlbSquashes - .prereq(tlbSquashes); - nisnDist - .init(/* base value */ 0, - /* last value */ fetch->fetchWidth, - /* bucket size */ 1) - .flags(Stats::pdf); - idleRate - .prereq(idleRate); - branchRate - .flags(Stats::total); - rate - .flags(Stats::total); -} -template -void -DefaultFetch::setTimeBuffer(TimeBuffer *time_buffer) -{ - timeBuffer = time_buffer; - - // Create wires to get information from proper places in time buffer. - fromDecode = timeBuffer->getWire(-decodeToFetchDelay); - fromRename = timeBuffer->getWire(-renameToFetchDelay); - fromIEW = timeBuffer->getWire(-iewToFetchDelay); - fromCommit = timeBuffer->getWire(-commitToFetchDelay); -} - -template -void -DefaultFetch::setActiveThreads(std::list *at_ptr) -{ - activeThreads = at_ptr; -} - -template -void -DefaultFetch::setFetchQueue(TimeBuffer *ftb_ptr) -{ - // Create wire to write information to proper place in fetch time buf. - toDecode = ftb_ptr->getWire(0); -} - -template -void -DefaultFetch::startupStage() -{ - assert(priorityList.empty()); - resetStage(); - - // Fetch needs to start fetching instructions at the very beginning, - // so it must start up in active state. - switchToActive(); -} - -template -void -DefaultFetch::clearStates(ThreadID tid) -{ - fetchStatus[tid] = Running; - pc[tid] = cpu->pcState(tid); - fetchOffset[tid] = 0; - macroop[tid] = NULL; - delayedCommit[tid] = false; - memReq[tid] = NULL; - stalls[tid].decode = false; - stalls[tid].drain = false; - fetchBufferPC[tid] = 0; - fetchBufferValid[tid] = false; - fetchQueue[tid].clear(); - - // TODO not sure what to do with priorityList for now - // priorityList.push_back(tid); -} - -template -void -DefaultFetch::resetStage() -{ - numInst = 0; - interruptPending = false; - cacheBlocked = false; - - priorityList.clear(); - - // Setup PC and nextPC with initial state. - for (ThreadID tid = 0; tid < numThreads; ++tid) { - fetchStatus[tid] = Running; - pc[tid] = cpu->pcState(tid); - fetchOffset[tid] = 0; - macroop[tid] = NULL; - - delayedCommit[tid] = false; - memReq[tid] = NULL; - - stalls[tid].decode = false; - stalls[tid].drain = false; - - fetchBufferPC[tid] = 0; - fetchBufferValid[tid] = false; - - fetchQueue[tid].clear(); - - priorityList.push_back(tid); - } - - wroteToTimeBuffer = false; - _status = Inactive; -} - -template -void -DefaultFetch::processCacheCompletion(PacketPtr pkt) -{ - ThreadID tid = cpu->contextToThread(pkt->req->contextId()); - - DPRINTF(Fetch, "[tid:%i] Waking up from cache miss.\n", tid); - assert(!cpu->switchedOut()); - - // Only change the status if it's still waiting on the icache access - // to return. - if (fetchStatus[tid] != IcacheWaitResponse || - pkt->req != memReq[tid]) { - ++fetchStats.icacheSquashes; - delete pkt; - return; - } - - memcpy(fetchBuffer[tid], pkt->getConstPtr(), fetchBufferSize); - fetchBufferValid[tid] = true; - - // Wake up the CPU (if it went to sleep and was waiting on - // this completion event). - cpu->wakeCPU(); - - DPRINTF(Activity, "[tid:%i] Activating fetch due to cache completion\n", - tid); - - switchToActive(); - - // Only switch to IcacheAccessComplete if we're not stalled as well. - if (checkStall(tid)) { - fetchStatus[tid] = Blocked; - } else { - fetchStatus[tid] = IcacheAccessComplete; - } - - pkt->req->setAccessLatency(); - cpu->ppInstAccessComplete->notify(pkt); - // Reset the mem req to NULL. - delete pkt; - memReq[tid] = NULL; -} - -template -void -DefaultFetch::drainResume() -{ - for (ThreadID i = 0; i < numThreads; ++i) { - stalls[i].decode = false; - stalls[i].drain = false; - } -} - -template -void -DefaultFetch::drainSanityCheck() const -{ - assert(isDrained()); - assert(retryPkt == NULL); - assert(retryTid == InvalidThreadID); - assert(!cacheBlocked); - assert(!interruptPending); - - for (ThreadID i = 0; i < numThreads; ++i) { - assert(!memReq[i]); - assert(fetchStatus[i] == Idle || stalls[i].drain); - } - - branchPred->drainSanityCheck(); -} - -template -bool -DefaultFetch::isDrained() const -{ - /* Make sure that threads are either idle of that the commit stage - * has signaled that draining has completed by setting the drain - * stall flag. This effectively forces the pipeline to be disabled - * until the whole system is drained (simulation may continue to - * drain other components). - */ - for (ThreadID i = 0; i < numThreads; ++i) { - // Verify fetch queues are drained - if (!fetchQueue[i].empty()) - return false; - - // Return false if not idle or drain stalled - if (fetchStatus[i] != Idle) { - if (fetchStatus[i] == Blocked && stalls[i].drain) - continue; - else - return false; - } - } - - /* The pipeline might start up again in the middle of the drain - * cycle if the finish translation event is scheduled, so make - * sure that's not the case. - */ - return !finishTranslationEvent.scheduled(); -} - -template -void -DefaultFetch::takeOverFrom() -{ - assert(cpu->getInstPort().isConnected()); - resetStage(); - -} - -template -void -DefaultFetch::drainStall(ThreadID tid) -{ - assert(cpu->isDraining()); - assert(!stalls[tid].drain); - DPRINTF(Drain, "%i: Thread drained.\n", tid); - stalls[tid].drain = true; -} - -template -void -DefaultFetch::wakeFromQuiesce() -{ - DPRINTF(Fetch, "Waking up from quiesce\n"); - // Hopefully this is safe - // @todo: Allow other threads to wake from quiesce. - fetchStatus[0] = Running; -} - -template -inline void -DefaultFetch::switchToActive() -{ - if (_status == Inactive) { - DPRINTF(Activity, "Activating stage.\n"); - - cpu->activateStage(FullO3CPU::FetchIdx); - - _status = Active; - } -} - -template -inline void -DefaultFetch::switchToInactive() -{ - if (_status == Active) { - DPRINTF(Activity, "Deactivating stage.\n"); - - cpu->deactivateStage(FullO3CPU::FetchIdx); - - _status = Inactive; - } -} - -template -void -DefaultFetch::deactivateThread(ThreadID tid) -{ - // Update priority list - auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid); - if (thread_it != priorityList.end()) { - priorityList.erase(thread_it); - } -} - -template -bool -DefaultFetch::lookupAndUpdateNextPC( - const O3DynInstPtr &inst, TheISA::PCState &nextPC) -{ - // Do branch prediction check here. - // A bit of a misnomer...next_PC is actually the current PC until - // this function updates it. - bool predict_taken; - - if (!inst->isControl()) { - inst->staticInst->advancePC(nextPC); - inst->setPredTarg(nextPC); - inst->setPredTaken(false); - return false; - } - - ThreadID tid = inst->threadNumber; - predict_taken = branchPred->predict(inst->staticInst, inst->seqNum, - nextPC, tid); - - if (predict_taken) { - DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " - "predicted to be taken to %s\n", - tid, inst->seqNum, inst->pcState().instAddr(), nextPC); - } else { - DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " - "predicted to be not taken\n", - tid, inst->seqNum, inst->pcState().instAddr()); - } - - DPRINTF(Fetch, "[tid:%i] [sn:%llu] Branch at PC %#x " - "predicted to go to %s\n", - tid, inst->seqNum, inst->pcState().instAddr(), nextPC); - inst->setPredTarg(nextPC); - inst->setPredTaken(predict_taken); - - ++fetchStats.branches; - - if (predict_taken) { - ++fetchStats.predictedBranches; - } - - return predict_taken; -} - -template -bool -DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) -{ - Fault fault = NoFault; - - assert(!cpu->switchedOut()); - - // @todo: not sure if these should block translation. - //AlphaDep - if (cacheBlocked) { - DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", - tid); - return false; - } else if (checkInterrupt(pc) && !delayedCommit[tid]) { - // Hold off fetch from getting new instructions when: - // Cache is blocked, or - // while an interrupt is pending and we're not in PAL mode, or - // fetch is switched out. - DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", - tid); - return false; - } - - // Align the fetch address to the start of a fetch buffer segment. - Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr); - - DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", - tid, fetchBufferBlockPC, vaddr); - - // Setup the memReq to do a read of the first instruction's address. - // Set the appropriate read size and flags as well. - // Build request here. - RequestPtr mem_req = std::make_shared( - fetchBufferBlockPC, fetchBufferSize, - Request::INST_FETCH, cpu->instRequestorId(), pc, - cpu->thread[tid]->contextId()); - - mem_req->taskId(cpu->taskId()); - - memReq[tid] = mem_req; - - // Initiate translation of the icache block - fetchStatus[tid] = ItlbWait; - FetchTranslation *trans = new FetchTranslation(this); - cpu->mmu->translateTiming(mem_req, cpu->thread[tid]->getTC(), - trans, BaseTLB::Execute); - return true; -} - -template -void -DefaultFetch::finishTranslation(const Fault &fault, - const RequestPtr &mem_req) -{ - ThreadID tid = cpu->contextToThread(mem_req->contextId()); - Addr fetchBufferBlockPC = mem_req->getVaddr(); - - assert(!cpu->switchedOut()); - - // Wake up CPU if it was idle - cpu->wakeCPU(); - - if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] || - mem_req->getVaddr() != memReq[tid]->getVaddr()) { - DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n", - tid); - ++fetchStats.tlbSquashes; - return; - } - - - // If translation was successful, attempt to read the icache block. - if (fault == NoFault) { - // Check that we're not going off into random memory - // If we have, just wait around for commit to squash something and put - // us on the right track - if (!cpu->system->isMemAddr(mem_req->getPaddr())) { - warn("Address %#x is outside of physical memory, stopping fetch\n", - mem_req->getPaddr()); - fetchStatus[tid] = NoGoodAddr; - memReq[tid] = NULL; - return; - } - - // Build packet here. - PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); - data_pkt->dataDynamic(new uint8_t[fetchBufferSize]); - - fetchBufferPC[tid] = fetchBufferBlockPC; - fetchBufferValid[tid] = false; - DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); - - fetchStats.cacheLines++; - - // Access the cache. - if (!icachePort.sendTimingReq(data_pkt)) { - assert(retryPkt == NULL); - assert(retryTid == InvalidThreadID); - DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); - - fetchStatus[tid] = IcacheWaitRetry; - retryPkt = data_pkt; - retryTid = tid; - cacheBlocked = true; - } else { - DPRINTF(Fetch, "[tid:%i] Doing Icache access.\n", tid); - DPRINTF(Activity, "[tid:%i] Activity: Waiting on I-cache " - "response.\n", tid); - lastIcacheStall[tid] = curTick(); - fetchStatus[tid] = IcacheWaitResponse; - // Notify Fetch Request probe when a packet containing a fetch - // request is successfully sent - ppFetchRequestSent->notify(mem_req); - } - } else { - // Don't send an instruction to decode if we can't handle it. - if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) { - assert(!finishTranslationEvent.scheduled()); - finishTranslationEvent.setFault(fault); - finishTranslationEvent.setReq(mem_req); - cpu->schedule(finishTranslationEvent, - cpu->clockEdge(Cycles(1))); - return; - } - DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n", - tid, mem_req->getVaddr(), memReq[tid]->getVaddr()); - // Translation faulted, icache request won't be sent. - memReq[tid] = NULL; - - // Send the fault to commit. This thread will not do anything - // until commit handles the fault. The only other way it can - // wake up is if a squash comes along and changes the PC. - TheISA::PCState fetchPC = pc[tid]; - - DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid); - // We will use a nop in ordier to carry the fault. - O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr, - fetchPC, fetchPC, false); - instruction->setNotAnInst(); - - instruction->setPredTarg(fetchPC); - instruction->fault = fault; - wroteToTimeBuffer = true; - - DPRINTF(Activity, "Activity this cycle.\n"); - cpu->activityThisCycle(); - - fetchStatus[tid] = TrapPending; - - DPRINTF(Fetch, "[tid:%i] Blocked, need to handle the trap.\n", tid); - DPRINTF(Fetch, "[tid:%i] fault (%s) detected @ PC %s.\n", - tid, fault->name(), pc[tid]); - } - _status = updateFetchStatus(); -} - -template -inline void -DefaultFetch::doSquash(const TheISA::PCState &newPC, - const O3DynInstPtr squashInst, ThreadID tid) -{ - DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n", - tid, newPC); - - pc[tid] = newPC; - fetchOffset[tid] = 0; - if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr()) - macroop[tid] = squashInst->macroop; - else - macroop[tid] = NULL; - decoder[tid]->reset(); - - // Clear the icache miss if it's outstanding. - if (fetchStatus[tid] == IcacheWaitResponse) { - DPRINTF(Fetch, "[tid:%i] Squashing outstanding Icache miss.\n", - tid); - memReq[tid] = NULL; - } else if (fetchStatus[tid] == ItlbWait) { - DPRINTF(Fetch, "[tid:%i] Squashing outstanding ITLB miss.\n", - tid); - memReq[tid] = NULL; - } - - // Get rid of the retrying packet if it was from this thread. - if (retryTid == tid) { - assert(cacheBlocked); - if (retryPkt) { - delete retryPkt; - } - retryPkt = NULL; - retryTid = InvalidThreadID; - } - - fetchStatus[tid] = Squashing; - - // Empty fetch queue - fetchQueue[tid].clear(); - - // microops are being squashed, it is not known wheather the - // youngest non-squashed microop was marked delayed commit - // or not. Setting the flag to true ensures that the - // interrupts are not handled when they cannot be, though - // some opportunities to handle interrupts may be missed. - delayedCommit[tid] = true; - - ++fetchStats.squashCycles; -} - -template -void -DefaultFetch::squashFromDecode(const TheISA::PCState &newPC, - const O3DynInstPtr squashInst, - const InstSeqNum seq_num, ThreadID tid) -{ - DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid); - - doSquash(newPC, squashInst, tid); - - // Tell the CPU to remove any instructions that are in flight between - // fetch and decode. - cpu->removeInstsUntil(seq_num, tid); -} - -template -bool -DefaultFetch::checkStall(ThreadID tid) const -{ - bool ret_val = false; - - if (stalls[tid].drain) { - assert(cpu->isDraining()); - DPRINTF(Fetch,"[tid:%i] Drain stall detected.\n",tid); - ret_val = true; - } - - return ret_val; -} - -template -typename DefaultFetch::FetchStatus -DefaultFetch::updateFetchStatus() -{ - //Check Running - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - - if (fetchStatus[tid] == Running || - fetchStatus[tid] == Squashing || - fetchStatus[tid] == IcacheAccessComplete) { - - if (_status == Inactive) { - DPRINTF(Activity, "[tid:%i] Activating stage.\n",tid); - - if (fetchStatus[tid] == IcacheAccessComplete) { - DPRINTF(Activity, "[tid:%i] Activating fetch due to cache" - "completion\n",tid); - } - - cpu->activateStage(FullO3CPU::FetchIdx); - } - - return Active; - } - } - - // Stage is switching from active to inactive, notify CPU of it. - if (_status == Active) { - DPRINTF(Activity, "Deactivating stage.\n"); - - cpu->deactivateStage(FullO3CPU::FetchIdx); - } - - return Inactive; -} - -template -void -DefaultFetch::squash(const TheISA::PCState &newPC, - const InstSeqNum seq_num, O3DynInstPtr squashInst, - ThreadID tid) -{ - DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid); - - doSquash(newPC, squashInst, tid); - - // Tell the CPU to remove any instructions that are not in the ROB. - cpu->removeInstsNotInROB(tid); -} - -template -void -DefaultFetch::tick() -{ - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - bool status_change = false; - - wroteToTimeBuffer = false; - - for (ThreadID i = 0; i < numThreads; ++i) { - issuePipelinedIfetch[i] = false; - } - - while (threads != end) { - ThreadID tid = *threads++; - - // Check the signals for each thread to determine the proper status - // for each thread. - bool updated_status = checkSignalsAndUpdate(tid); - status_change = status_change || updated_status; - } - - DPRINTF(Fetch, "Running stage.\n"); - - if (FullSystem) { - if (fromCommit->commitInfo[0].interruptPending) { - interruptPending = true; - } - - if (fromCommit->commitInfo[0].clearInterrupt) { - interruptPending = false; - } - } - - for (threadFetched = 0; threadFetched < numFetchingThreads; - threadFetched++) { - // Fetch each of the actively fetching threads. - fetch(status_change); - } - - // Record number of instructions fetched this cycle for distribution. - fetchStats.nisnDist.sample(numInst); - - if (status_change) { - // Change the fetch stage status if there was a status change. - _status = updateFetchStatus(); - } - - // Issue the next I-cache request if possible. - for (ThreadID i = 0; i < numThreads; ++i) { - if (issuePipelinedIfetch[i]) { - pipelineIcacheAccesses(i); - } - } - - // Send instructions enqueued into the fetch queue to decode. - // Limit rate by fetchWidth. Stall if decode is stalled. - unsigned insts_to_decode = 0; - unsigned available_insts = 0; - - for (auto tid : *activeThreads) { - if (!stalls[tid].decode) { - available_insts += fetchQueue[tid].size(); - } - } - - // Pick a random thread to start trying to grab instructions from - auto tid_itr = activeThreads->begin(); - std::advance(tid_itr, random_mt.random(0, activeThreads->size() - 1)); - - while (available_insts != 0 && insts_to_decode < decodeWidth) { - ThreadID tid = *tid_itr; - if (!stalls[tid].decode && !fetchQueue[tid].empty()) { - const auto& inst = fetchQueue[tid].front(); - toDecode->insts[toDecode->size++] = inst; - DPRINTF(Fetch, "[tid:%i] [sn:%llu] Sending instruction to decode " - "from fetch queue. Fetch queue size: %i.\n", - tid, inst->seqNum, fetchQueue[tid].size()); - - wroteToTimeBuffer = true; - fetchQueue[tid].pop_front(); - insts_to_decode++; - available_insts--; - } - - tid_itr++; - // Wrap around if at end of active threads list - if (tid_itr == activeThreads->end()) - tid_itr = activeThreads->begin(); - } - - // If there was activity this cycle, inform the CPU of it. - if (wroteToTimeBuffer) { - DPRINTF(Activity, "Activity this cycle.\n"); - cpu->activityThisCycle(); - } - - // Reset the number of the instruction we've fetched. - numInst = 0; -} - -template -bool -DefaultFetch::checkSignalsAndUpdate(ThreadID tid) -{ - // Update the per thread stall statuses. - if (fromDecode->decodeBlock[tid]) { - stalls[tid].decode = true; - } - - if (fromDecode->decodeUnblock[tid]) { - assert(stalls[tid].decode); - assert(!fromDecode->decodeBlock[tid]); - stalls[tid].decode = false; - } - - // Check squash signals from commit. - if (fromCommit->commitInfo[tid].squash) { - - DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " - "from commit.\n",tid); - // In any case, squash. - squash(fromCommit->commitInfo[tid].pc, - fromCommit->commitInfo[tid].doneSeqNum, - fromCommit->commitInfo[tid].squashInst, tid); - - // If it was a branch mispredict on a control instruction, update the - // branch predictor with that instruction, otherwise just kill the - // invalid state we generated in after sequence number - if (fromCommit->commitInfo[tid].mispredictInst && - fromCommit->commitInfo[tid].mispredictInst->isControl()) { - branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, - fromCommit->commitInfo[tid].pc, - fromCommit->commitInfo[tid].branchTaken, - tid); - } else { - branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum, - tid); - } - - return true; - } else if (fromCommit->commitInfo[tid].doneSeqNum) { - // Update the branch predictor if it wasn't a squashed instruction - // that was broadcasted. - branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid); - } - - // Check squash signals from decode. - if (fromDecode->decodeInfo[tid].squash) { - DPRINTF(Fetch, "[tid:%i] Squashing instructions due to squash " - "from decode.\n",tid); - - // Update the branch predictor. - if (fromDecode->decodeInfo[tid].branchMispredict) { - branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, - fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].branchTaken, - tid); - } else { - branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum, - tid); - } - - if (fetchStatus[tid] != Squashing) { - - DPRINTF(Fetch, "Squashing from decode with PC = %s\n", - fromDecode->decodeInfo[tid].nextPC); - // Squash unless we're already squashing - squashFromDecode(fromDecode->decodeInfo[tid].nextPC, - fromDecode->decodeInfo[tid].squashInst, - fromDecode->decodeInfo[tid].doneSeqNum, - tid); - - return true; - } - } - - if (checkStall(tid) && - fetchStatus[tid] != IcacheWaitResponse && - fetchStatus[tid] != IcacheWaitRetry && - fetchStatus[tid] != ItlbWait && - fetchStatus[tid] != QuiescePending) { - DPRINTF(Fetch, "[tid:%i] Setting to blocked\n",tid); - - fetchStatus[tid] = Blocked; - - return true; - } - - if (fetchStatus[tid] == Blocked || - fetchStatus[tid] == Squashing) { - // Switch status to running if fetch isn't being told to block or - // squash this cycle. - DPRINTF(Fetch, "[tid:%i] Done squashing, switching to running.\n", - tid); - - fetchStatus[tid] = Running; - - return true; - } - - // If we've reached this point, we have not gotten any signals that - // cause fetch to change its status. Fetch remains the same as before. - return false; -} - -template -O3DynInstPtr -DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst, - StaticInstPtr curMacroop, TheISA::PCState thisPC, - TheISA::PCState nextPC, bool trace) -{ - // Get a sequence number. - InstSeqNum seq = cpu->getAndIncrementInstSeq(); - - // Create a new DynInst from the instruction fetched. - O3DynInstPtr instruction = - new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); - instruction->setTid(tid); - - instruction->setThreadState(cpu->thread[tid]); - - DPRINTF(Fetch, "[tid:%i] Instruction PC %#x (%d) created " - "[sn:%lli].\n", tid, thisPC.instAddr(), - thisPC.microPC(), seq); - - DPRINTF(Fetch, "[tid:%i] Instruction is: %s\n", tid, - instruction->staticInst-> - disassemble(thisPC.instAddr())); - -#if TRACING_ON - if (trace) { - instruction->traceData = - cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid), - instruction->staticInst, thisPC, curMacroop); - } -#else - instruction->traceData = NULL; -#endif - - // Add instruction to the CPU's list of instructions. - instruction->setInstListIt(cpu->addInst(instruction)); - - // Write the instruction to the first slot in the queue - // that heads to decode. - assert(numInst < fetchWidth); - fetchQueue[tid].push_back(instruction); - assert(fetchQueue[tid].size() <= fetchQueueSize); - DPRINTF(Fetch, "[tid:%i] Fetch queue entry created (%i/%i).\n", - tid, fetchQueue[tid].size(), fetchQueueSize); - //toDecode->insts[toDecode->size++] = instruction; - - // Keep track of if we can take an interrupt at this boundary - delayedCommit[tid] = instruction->isDelayedCommit(); - - return instruction; -} - -template -void -DefaultFetch::fetch(bool &status_change) -{ - ////////////////////////////////////////// - // Start actual fetch - ////////////////////////////////////////// - ThreadID tid = getFetchingThread(); - - assert(!cpu->switchedOut()); - - if (tid == InvalidThreadID) { - // Breaks looping condition in tick() - threadFetched = numFetchingThreads; - - if (numThreads == 1) { // @todo Per-thread stats - profileStall(0); - } - - return; - } - - DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid); - - // The current PC. - TheISA::PCState thisPC = pc[tid]; - - Addr pcOffset = fetchOffset[tid]; - Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - - bool inRom = isRomMicroPC(thisPC.microPC()); - - // If returning from the delay of a cache miss, then update the status - // to running, otherwise do the cache access. Possibly move this up - // to tick() function. - if (fetchStatus[tid] == IcacheAccessComplete) { - DPRINTF(Fetch, "[tid:%i] Icache miss is complete.\n", tid); - - fetchStatus[tid] = Running; - status_change = true; - } else if (fetchStatus[tid] == Running) { - // Align the fetch PC so its at the start of a fetch buffer segment. - Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); - - // If buffer is no longer valid or fetchAddr has moved to point - // to the next cache block, AND we have no remaining ucode - // from a macro-op, then start fetch from icache. - if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid]) - && !inRom && !macroop[tid]) { - DPRINTF(Fetch, "[tid:%i] Attempting to translate and read " - "instruction, starting at PC %s.\n", tid, thisPC); - - fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); - - if (fetchStatus[tid] == IcacheWaitResponse) - ++fetchStats.icacheStallCycles; - else if (fetchStatus[tid] == ItlbWait) - ++fetchStats.tlbCycles; - else - ++fetchStats.miscStallCycles; - return; - } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) { - // Stall CPU if an interrupt is posted and we're not issuing - // an delayed commit micro-op currently (delayed commit instructions - // are not interruptable by interrupts, only faults) - ++fetchStats.miscStallCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is stalled!\n", tid); - return; - } - } else { - if (fetchStatus[tid] == Idle) { - ++fetchStats.idleCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is idle!\n", tid); - } - - // Status is Idle, so fetch should do nothing. - return; - } - - ++fetchStats.cycles; - - TheISA::PCState nextPC = thisPC; - - StaticInstPtr staticInst = NULL; - StaticInstPtr curMacroop = macroop[tid]; - - // If the read of the first instruction was successful, then grab the - // instructions from the rest of the cache line and put them into the - // queue heading to decode. - - DPRINTF(Fetch, "[tid:%i] Adding instructions to queue to " - "decode.\n", tid); - - // Need to keep track of whether or not a predicted branch - // ended this fetch block. - bool predictedBranch = false; - - // Need to halt fetch if quiesce instruction detected - bool quiesce = false; - - TheISA::MachInst *cacheInsts = - reinterpret_cast(fetchBuffer[tid]); - - const unsigned numInsts = fetchBufferSize / instSize; - unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; - - // Loop through instruction memory from the cache. - // Keep issuing while fetchWidth is available and branch is not - // predicted taken - while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize - && !predictedBranch && !quiesce) { - // We need to process more memory if we aren't going to get a - // StaticInst from the rom, the current macroop, or what's already - // in the decoder. - bool needMem = !inRom && !curMacroop && - !decoder[tid]->instReady(); - fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); - - if (needMem) { - // If buffer is no longer valid or fetchAddr has moved to point - // to the next cache block then start fetch from icache. - if (!fetchBufferValid[tid] || - fetchBufferBlockPC != fetchBufferPC[tid]) - break; - - if (blkOffset >= numInsts) { - // We need to process more memory, but we've run out of the - // current block. - break; - } - - decoder[tid]->moreBytes(thisPC, fetchAddr, cacheInsts[blkOffset]); - - if (decoder[tid]->needMoreBytes()) { - blkOffset++; - fetchAddr += instSize; - pcOffset += instSize; - } - } - - // Extract as many instructions and/or microops as we can from - // the memory we've processed so far. - do { - if (!(curMacroop || inRom)) { - if (decoder[tid]->instReady()) { - staticInst = decoder[tid]->decode(thisPC); - - // Increment stat of fetched instructions. - ++fetchStats.insts; - - if (staticInst->isMacroop()) { - curMacroop = staticInst; - } else { - pcOffset = 0; - } - } else { - // We need more bytes for this instruction so blkOffset and - // pcOffset will be updated - break; - } - } - // Whether we're moving to a new macroop because we're at the - // end of the current one, or the branch predictor incorrectly - // thinks we are... - bool newMacro = false; - if (curMacroop || inRom) { - if (inRom) { - staticInst = decoder[tid]->fetchRomMicroop( - thisPC.microPC(), curMacroop); - } else { - staticInst = curMacroop->fetchMicroop(thisPC.microPC()); - } - newMacro |= staticInst->isLastMicroop(); - } - - O3DynInstPtr instruction = - buildInst(tid, staticInst, curMacroop, - thisPC, nextPC, true); - - ppFetch->notify(instruction); - numInst++; - -#if TRACING_ON - if (Debug::O3PipeView) { - instruction->fetchTick = curTick(); - } -#endif - - nextPC = thisPC; - - // If we're branching after this instruction, quit fetching - // from the same block. - predictedBranch |= thisPC.branching(); - predictedBranch |= - lookupAndUpdateNextPC(instruction, nextPC); - if (predictedBranch) { - DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); - } - - newMacro |= thisPC.instAddr() != nextPC.instAddr(); - - // Move to the next instruction, unless we have a branch. - thisPC = nextPC; - inRom = isRomMicroPC(thisPC.microPC()); - - if (newMacro) { - fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; - blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; - pcOffset = 0; - curMacroop = NULL; - } - - if (instruction->isQuiesce()) { - DPRINTF(Fetch, - "Quiesce instruction encountered, halting fetch!\n"); - fetchStatus[tid] = QuiescePending; - status_change = true; - quiesce = true; - break; - } - } while ((curMacroop || decoder[tid]->instReady()) && - numInst < fetchWidth && - fetchQueue[tid].size() < fetchQueueSize); - - // Re-evaluate whether the next instruction to fetch is in micro-op ROM - // or not. - inRom = isRomMicroPC(thisPC.microPC()); - } - - if (predictedBranch) { - DPRINTF(Fetch, "[tid:%i] Done fetching, predicted branch " - "instruction encountered.\n", tid); - } else if (numInst >= fetchWidth) { - DPRINTF(Fetch, "[tid:%i] Done fetching, reached fetch bandwidth " - "for this cycle.\n", tid); - } else if (blkOffset >= fetchBufferSize) { - DPRINTF(Fetch, "[tid:%i] Done fetching, reached the end of the" - "fetch buffer.\n", tid); - } - - macroop[tid] = curMacroop; - fetchOffset[tid] = pcOffset; - - if (numInst > 0) { - wroteToTimeBuffer = true; - } - - pc[tid] = thisPC; - - // pipeline a fetch if we're crossing a fetch buffer boundary and not in - // a state that would preclude fetching - fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); - issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] && - fetchStatus[tid] != IcacheWaitResponse && - fetchStatus[tid] != ItlbWait && - fetchStatus[tid] != IcacheWaitRetry && - fetchStatus[tid] != QuiescePending && - !curMacroop; -} - -template -void -DefaultFetch::recvReqRetry() -{ - if (retryPkt != NULL) { - assert(cacheBlocked); - assert(retryTid != InvalidThreadID); - assert(fetchStatus[retryTid] == IcacheWaitRetry); - - if (icachePort.sendTimingReq(retryPkt)) { - fetchStatus[retryTid] = IcacheWaitResponse; - // Notify Fetch Request probe when a retryPkt is successfully sent. - // Note that notify must be called before retryPkt is set to NULL. - ppFetchRequestSent->notify(retryPkt->req); - retryPkt = NULL; - retryTid = InvalidThreadID; - cacheBlocked = false; - } - } else { - assert(retryTid == InvalidThreadID); - // Access has been squashed since it was sent out. Just clear - // the cache being blocked. - cacheBlocked = false; - } -} - -/////////////////////////////////////// -// // -// SMT FETCH POLICY MAINTAINED HERE // -// // -/////////////////////////////////////// -template -ThreadID -DefaultFetch::getFetchingThread() -{ - if (numThreads > 1) { - switch (fetchPolicy) { - case SMTFetchPolicy::RoundRobin: - return roundRobin(); - case SMTFetchPolicy::IQCount: - return iqCount(); - case SMTFetchPolicy::LSQCount: - return lsqCount(); - case SMTFetchPolicy::Branch: - return branchCount(); - default: - return InvalidThreadID; - } - } else { - std::list::iterator thread = activeThreads->begin(); - if (thread == activeThreads->end()) { - return InvalidThreadID; - } - - ThreadID tid = *thread; - - if (fetchStatus[tid] == Running || - fetchStatus[tid] == IcacheAccessComplete || - fetchStatus[tid] == Idle) { - return tid; - } else { - return InvalidThreadID; - } - } -} - - -template -ThreadID -DefaultFetch::roundRobin() -{ - std::list::iterator pri_iter = priorityList.begin(); - std::list::iterator end = priorityList.end(); - - ThreadID high_pri; - - while (pri_iter != end) { - high_pri = *pri_iter; - - assert(high_pri <= numThreads); - - if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheAccessComplete || - fetchStatus[high_pri] == Idle) { - - priorityList.erase(pri_iter); - priorityList.push_back(high_pri); - - return high_pri; - } - - pri_iter++; - } - - return InvalidThreadID; -} - -template -ThreadID -DefaultFetch::iqCount() -{ - //sorted from lowest->highest - std::priority_queue, - std::greater > PQ; - std::map threadMap; - - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - unsigned iqCount = fromIEW->iewInfo[tid].iqCount; - - //we can potentially get tid collisions if two threads - //have the same iqCount, but this should be rare. - PQ.push(iqCount); - threadMap[iqCount] = tid; - } - - while (!PQ.empty()) { - ThreadID high_pri = threadMap[PQ.top()]; - - if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheAccessComplete || - fetchStatus[high_pri] == Idle) - return high_pri; - else - PQ.pop(); - - } - - return InvalidThreadID; -} - -template -ThreadID -DefaultFetch::lsqCount() -{ - //sorted from lowest->highest - std::priority_queue, - std::greater > PQ; - std::map threadMap; - - std::list::iterator threads = activeThreads->begin(); - std::list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount; - - //we can potentially get tid collisions if two threads - //have the same iqCount, but this should be rare. - PQ.push(ldstqCount); - threadMap[ldstqCount] = tid; - } - - while (!PQ.empty()) { - ThreadID high_pri = threadMap[PQ.top()]; - - if (fetchStatus[high_pri] == Running || - fetchStatus[high_pri] == IcacheAccessComplete || - fetchStatus[high_pri] == Idle) - return high_pri; - else - PQ.pop(); - } - - return InvalidThreadID; -} - -template -ThreadID -DefaultFetch::branchCount() -{ - panic("Branch Count Fetch policy unimplemented\n"); - return InvalidThreadID; -} - -template -void -DefaultFetch::pipelineIcacheAccesses(ThreadID tid) -{ - if (!issuePipelinedIfetch[tid]) { - return; - } - - // The next PC to access. - TheISA::PCState thisPC = pc[tid]; - - if (isRomMicroPC(thisPC.microPC())) { - return; - } - - Addr pcOffset = fetchOffset[tid]; - Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - - // Align the fetch PC so its at the start of a fetch buffer segment. - Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); - - // Unless buffer already got the block, fetch it from icache. - if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) { - DPRINTF(Fetch, "[tid:%i] Issuing a pipelined I-cache access, " - "starting at PC %s.\n", tid, thisPC); - - fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); - } -} - -template -void -DefaultFetch::profileStall(ThreadID tid) { - DPRINTF(Fetch,"There are no more threads available to fetch from.\n"); - - // @todo Per-thread stats - - if (stalls[tid].drain) { - ++fetchStats.pendingDrainCycles; - DPRINTF(Fetch, "Fetch is waiting for a drain!\n"); - } else if (activeThreads->empty()) { - ++fetchStats.noActiveThreadStallCycles; - DPRINTF(Fetch, "Fetch has no active thread!\n"); - } else if (fetchStatus[tid] == Blocked) { - ++fetchStats.blockedCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is blocked!\n", tid); - } else if (fetchStatus[tid] == Squashing) { - ++fetchStats.squashCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid); - } else if (fetchStatus[tid] == IcacheWaitResponse) { - ++fetchStats.icacheStallCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n", - tid); - } else if (fetchStatus[tid] == ItlbWait) { - ++fetchStats.tlbCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is waiting ITLB walk to " - "finish!\n", tid); - } else if (fetchStatus[tid] == TrapPending) { - ++fetchStats.pendingTrapStallCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending trap!\n", - tid); - } else if (fetchStatus[tid] == QuiescePending) { - ++fetchStats.pendingQuiesceStallCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is waiting for a pending quiesce " - "instruction!\n", tid); - } else if (fetchStatus[tid] == IcacheWaitRetry) { - ++fetchStats.icacheWaitRetryStallCycles; - DPRINTF(Fetch, "[tid:%i] Fetch is waiting for an I-cache retry!\n", - tid); - } else if (fetchStatus[tid] == NoGoodAddr) { - DPRINTF(Fetch, "[tid:%i] Fetch predicted non-executable address\n", - tid); - } else { - DPRINTF(Fetch, "[tid:%i] Unexpected fetch stall reason " - "(Status: %i)\n", - tid, fetchStatus[tid]); - } -} - -template -bool -DefaultFetch::IcachePort::recvTimingResp(PacketPtr pkt) -{ - DPRINTF(O3CPU, "Fetch unit received timing\n"); - // We shouldn't ever get a cacheable block in Modified state - assert(pkt->req->isUncacheable() || - !(pkt->cacheResponding() && !pkt->hasSharers())); - fetch->processCacheCompletion(pkt); - - return true; -} - -template -void -DefaultFetch::IcachePort::recvReqRetry() -{ - fetch->recvReqRetry(); -} - -#endif//__CPU_O3_FETCH_IMPL_HH__