From 9adca305285e10f19e656c0a22a5b2a7e2f8e19c Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 3 Mar 2021 02:56:15 -0800 Subject: [PATCH] cpu: De-templatize the O3 InstructionQueue. Change-Id: Id897b66b4041a6be4c85019585b205e8d8b366e5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42108 Tested-by: kokoro Reviewed-by: Gabe Black Maintainer: Gabe Black --- src/cpu/o3/iew.hh | 2 +- src/cpu/o3/inst_queue.cc | 1543 ++++++++++++++++++++++++++++++- src/cpu/o3/inst_queue.hh | 16 +- src/cpu/o3/inst_queue_impl.hh | 1623 --------------------------------- src/cpu/o3/lsq_unit.cc | 11 + src/cpu/o3/lsq_unit.hh | 13 +- src/cpu/o3/mem_dep_unit.cc | 2 +- src/cpu/o3/mem_dep_unit.hh | 5 +- 8 files changed, 1566 insertions(+), 1649 deletions(-) delete mode 100644 src/cpu/o3/inst_queue_impl.hh diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 24999b5b30..7bcfec63ef 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -351,7 +351,7 @@ class DefaultIEW public: /** Instruction queue. */ - InstructionQueue instQueue; + InstructionQueue instQueue; /** Load / store queue. */ LSQ ldstQueue; diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 0b67ed1111..90c571f6b2 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -1,5 +1,18 @@ /* - * Copyright (c) 2004-2005 The Regents of The University of Michigan + * Copyright (c) 2011-2014, 2017-2020 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,8 +39,1528 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "cpu/o3/inst_queue_impl.hh" -#include "cpu/o3/isa_specific.hh" +#include "cpu/o3/inst_queue.hh" -// Force instantiation of InstructionQueue. -template class InstructionQueue; +#include +#include + +#include "base/logging.hh" +#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/fu_pool.hh" +#include "cpu/o3/limits.hh" +#include "debug/IQ.hh" +#include "enums/OpClass.hh" +#include "params/DerivO3CPU.hh" +#include "sim/core.hh" + +// clang complains about std::set being overloaded with Packet::set if +// we open up the entire namespace std +using std::list; + +InstructionQueue::FUCompletion::FUCompletion(const O3DynInstPtr &_inst, + int fu_idx, InstructionQueue *iq_ptr) + : Event(Stat_Event_Pri, AutoDelete), + inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) +{ +} + +void +InstructionQueue::FUCompletion::process() +{ + iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); + inst = NULL; +} + + +const char * +InstructionQueue::FUCompletion::description() const +{ + return "Functional unit completion"; +} + +InstructionQueue::InstructionQueue(FullO3CPU *cpu_ptr, + DefaultIEW *iew_ptr, const DerivO3CPUParams ¶ms) + : cpu(cpu_ptr), + iewStage(iew_ptr), + fuPool(params.fuPool), + iqPolicy(params.smtIQPolicy), + numThreads(params.numThreads), + numEntries(params.numIQEntries), + totalWidth(params.issueWidth), + commitToIEWDelay(params.commitToIEWDelay), + iqStats(cpu, totalWidth), + iqIOStats(cpu) +{ + assert(fuPool); + + // Set the number of total physical registers + // As the vector registers have two addressing modes, they are added twice + numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs + + params.numPhysVecRegs + + params.numPhysVecRegs * TheISA::NumVecElemPerVecReg + + params.numPhysVecPredRegs + + params.numPhysCCRegs; + + //Create an entry for each physical register within the + //dependency graph. + dependGraph.resize(numPhysRegs); + + // Resize the register scoreboard. + regScoreboard.resize(numPhysRegs); + + //Initialize Mem Dependence Units + for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { + memDepUnit[tid].init(params, tid, cpu_ptr); + memDepUnit[tid].setIQ(this); + } + + resetState(); + + //Figure out resource sharing policy + if (iqPolicy == SMTQueuePolicy::Dynamic) { + //Set Max Entries to Total ROB Capacity + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = numEntries; + } + + } else if (iqPolicy == SMTQueuePolicy::Partitioned) { + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide ROB up evenly + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = part_amt; + } + + DPRINTF(IQ, "IQ sharing policy set to Partitioned:" + "%i entries per thread.\n",part_amt); + } else if (iqPolicy == SMTQueuePolicy::Threshold) { + double threshold = (double)params.smtIQThreshold / 100; + + int thresholdIQ = (int)((double)threshold * numEntries); + + //Divide up by threshold amount + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = thresholdIQ; + } + + DPRINTF(IQ, "IQ sharing policy set to Threshold:" + "%i entries per thread.\n",thresholdIQ); + } + for (ThreadID tid = numThreads; tid < O3MaxThreads; tid++) { + maxEntries[tid] = 0; + } +} + +InstructionQueue::~InstructionQueue() +{ + dependGraph.reset(); +#ifdef DEBUG + cprintf("Nodes traversed: %i, removed: %i\n", + dependGraph.nodesTraversed, dependGraph.nodesRemoved); +#endif +} + +std::string +InstructionQueue::name() const +{ + return cpu->name() + ".iq"; +} + +InstructionQueue::IQStats::IQStats(FullO3CPU *cpu, + const unsigned &total_width) + : Stats::Group(cpu), + ADD_STAT(instsAdded, Stats::Units::Count::get(), + "Number of instructions added to the IQ (excludes non-spec)"), + ADD_STAT(nonSpecInstsAdded, Stats::Units::Count::get(), + "Number of non-speculative instructions added to the IQ"), + ADD_STAT(instsIssued, Stats::Units::Count::get(), + "Number of instructions issued"), + ADD_STAT(intInstsIssued, Stats::Units::Count::get(), + "Number of integer instructions issued"), + ADD_STAT(floatInstsIssued, Stats::Units::Count::get(), + "Number of float instructions issued"), + ADD_STAT(branchInstsIssued, Stats::Units::Count::get(), + "Number of branch instructions issued"), + ADD_STAT(memInstsIssued, Stats::Units::Count::get(), + "Number of memory instructions issued"), + ADD_STAT(miscInstsIssued, Stats::Units::Count::get(), + "Number of miscellaneous instructions issued"), + ADD_STAT(squashedInstsIssued, Stats::Units::Count::get(), + "Number of squashed instructions issued"), + ADD_STAT(squashedInstsExamined, Stats::Units::Count::get(), + "Number of squashed instructions iterated over during squash; " + "mainly for profiling"), + ADD_STAT(squashedOperandsExamined, Stats::Units::Count::get(), + "Number of squashed operands that are examined and possibly " + "removed from graph"), + ADD_STAT(squashedNonSpecRemoved, Stats::Units::Count::get(), + "Number of squashed non-spec instructions that were removed"), + ADD_STAT(numIssuedDist, Stats::Units::Count::get(), + "Number of insts issued each cycle"), + ADD_STAT(statFuBusy, Stats::Units::Count::get(), + "attempts to use FU when none available"), + ADD_STAT(statIssuedInstType, Stats::Units::Count::get(), + "Number of instructions issued per FU type, per thread"), + ADD_STAT(issueRate, Stats::Units::Rate< + Stats::Units::Count, Stats::Units::Cycle>::get(), + "Inst issue rate", instsIssued / cpu->baseStats.numCycles), + ADD_STAT(fuBusy, Stats::Units::Count::get(), "FU busy when requested"), + ADD_STAT(fuBusyRate, Stats::Units::Rate< + Stats::Units::Count, Stats::Units::Count>::get(), + "FU busy rate (busy events/executed inst)") +{ + instsAdded + .prereq(instsAdded); + + nonSpecInstsAdded + .prereq(nonSpecInstsAdded); + + instsIssued + .prereq(instsIssued); + + intInstsIssued + .prereq(intInstsIssued); + + floatInstsIssued + .prereq(floatInstsIssued); + + branchInstsIssued + .prereq(branchInstsIssued); + + memInstsIssued + .prereq(memInstsIssued); + + miscInstsIssued + .prereq(miscInstsIssued); + + squashedInstsIssued + .prereq(squashedInstsIssued); + + squashedInstsExamined + .prereq(squashedInstsExamined); + + squashedOperandsExamined + .prereq(squashedOperandsExamined); + + squashedNonSpecRemoved + .prereq(squashedNonSpecRemoved); +/* + queueResDist + .init(Num_OpClasses, 0, 99, 2) + .name(name() + ".IQ:residence:") + .desc("cycles from dispatch to issue") + .flags(total | pdf | cdf ) + ; + for (int i = 0; i < Num_OpClasses; ++i) { + queueResDist.subname(i, opClassStrings[i]); + } +*/ + numIssuedDist + .init(0,total_width,1) + .flags(Stats::pdf) + ; +/* + dist_unissued + .init(Num_OpClasses+2) + .name(name() + ".unissued_cause") + .desc("Reason ready instruction not issued") + .flags(pdf | dist) + ; + for (int i=0; i < (Num_OpClasses + 2); ++i) { + dist_unissued.subname(i, unissued_names[i]); + } +*/ + statIssuedInstType + .init(cpu->numThreads,Enums::Num_OpClass) + .flags(Stats::total | Stats::pdf | Stats::dist) + ; + statIssuedInstType.ysubnames(Enums::OpClassStrings); + + // + // How long did instructions for a particular FU type wait prior to issue + // +/* + issueDelayDist + .init(Num_OpClasses,0,99,2) + .name(name() + ".") + .desc("cycles from operands ready to issue") + .flags(pdf | cdf) + ; + for (int i=0; inumThreads) + .flags(Stats::total) + ; + + fuBusyRate + .flags(Stats::total) + ; + fuBusyRate = fuBusy / instsIssued; +} + +InstructionQueue::IQIOStats::IQIOStats(Stats::Group *parent) + : Stats::Group(parent), + ADD_STAT(intInstQueueReads, Stats::Units::Count::get(), + "Number of integer instruction queue reads"), + ADD_STAT(intInstQueueWrites, Stats::Units::Count::get(), + "Number of integer instruction queue writes"), + ADD_STAT(intInstQueueWakeupAccesses, Stats::Units::Count::get(), + "Number of integer instruction queue wakeup accesses"), + ADD_STAT(fpInstQueueReads, Stats::Units::Count::get(), + "Number of floating instruction queue reads"), + ADD_STAT(fpInstQueueWrites, Stats::Units::Count::get(), + "Number of floating instruction queue writes"), + ADD_STAT(fpInstQueueWakeupAccesses, Stats::Units::Count::get(), + "Number of floating instruction queue wakeup accesses"), + ADD_STAT(vecInstQueueReads, Stats::Units::Count::get(), + "Number of vector instruction queue reads"), + ADD_STAT(vecInstQueueWrites, Stats::Units::Count::get(), + "Number of vector instruction queue writes"), + ADD_STAT(vecInstQueueWakeupAccesses, Stats::Units::Count::get(), + "Number of vector instruction queue wakeup accesses"), + ADD_STAT(intAluAccesses, Stats::Units::Count::get(), + "Number of integer alu accesses"), + ADD_STAT(fpAluAccesses, Stats::Units::Count::get(), + "Number of floating point alu accesses"), + ADD_STAT(vecAluAccesses, Stats::Units::Count::get(), + "Number of vector alu accesses") +{ + using namespace Stats; + intInstQueueReads + .flags(total); + + intInstQueueWrites + .flags(total); + + intInstQueueWakeupAccesses + .flags(total); + + fpInstQueueReads + .flags(total); + + fpInstQueueWrites + .flags(total); + + fpInstQueueWakeupAccesses + .flags(total); + + vecInstQueueReads + .flags(total); + + vecInstQueueWrites + .flags(total); + + vecInstQueueWakeupAccesses + .flags(total); + + intAluAccesses + .flags(total); + + fpAluAccesses + .flags(total); + + vecAluAccesses + .flags(total); +} + +void +InstructionQueue::resetState() +{ + //Initialize thread IQ counts + for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { + count[tid] = 0; + instList[tid].clear(); + } + + // Initialize the number of free IQ entries. + freeEntries = numEntries; + + // Note that in actuality, the registers corresponding to the logical + // registers start off as ready. However this doesn't matter for the + // IQ as the instruction should have been correctly told if those + // registers are ready in rename. Thus it can all be initialized as + // unready. + for (int i = 0; i < numPhysRegs; ++i) { + regScoreboard[i] = false; + } + + for (ThreadID tid = 0; tid < O3MaxThreads; ++tid) { + squashedSeqNum[tid] = 0; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + while (!readyInsts[i].empty()) + readyInsts[i].pop(); + queueOnList[i] = false; + readyIt[i] = listOrder.end(); + } + nonSpecInsts.clear(); + listOrder.clear(); + deferredMemInsts.clear(); + blockedMemInsts.clear(); + retryMemInsts.clear(); + wbOutstanding = 0; +} + +void +InstructionQueue::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; +} + +void +InstructionQueue::setIssueToExecuteQueue( + TimeBuffer *i2e_ptr) +{ + issueToExecuteQueue = i2e_ptr; +} + +void +InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToIEWDelay); +} + +bool +InstructionQueue::isDrained() const +{ + bool drained = dependGraph.empty() && + instsToExecute.empty() && + wbOutstanding == 0; + for (ThreadID tid = 0; tid < numThreads; ++tid) + drained = drained && memDepUnit[tid].isDrained(); + + return drained; +} + +void +InstructionQueue::drainSanityCheck() const +{ + assert(dependGraph.empty()); + assert(instsToExecute.empty()); + for (ThreadID tid = 0; tid < numThreads; ++tid) + memDepUnit[tid].drainSanityCheck(); +} + +void +InstructionQueue::takeOverFrom() +{ + resetState(); +} + +int +InstructionQueue::entryAmount(ThreadID num_threads) +{ + if (iqPolicy == SMTQueuePolicy::Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + + +void +InstructionQueue::resetEntries() +{ + if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) { + int active_threads = activeThreads->size(); + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (iqPolicy == SMTQueuePolicy::Partitioned) { + maxEntries[tid] = numEntries / active_threads; + } else if (iqPolicy == SMTQueuePolicy::Threshold && + active_threads == 1) { + maxEntries[tid] = numEntries; + } + } + } +} + +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + +unsigned +InstructionQueue::numFreeEntries(ThreadID tid) +{ + return maxEntries[tid] - count[tid]; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +bool +InstructionQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +bool +InstructionQueue::isFull(ThreadID tid) +{ + if (numFreeEntries(tid) == 0) { + return(true); + } else { + return(false); + } +} + +bool +InstructionQueue::hasReadyInsts() +{ + if (!listOrder.empty()) { + return true; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + return true; + } + } + + return false; +} + +void +InstructionQueue::insert(const O3DynInstPtr &new_inst) +{ + if (new_inst->isFloating()) { + iqIOStats.fpInstQueueWrites++; + } else if (new_inst->isVector()) { + iqIOStats.vecInstQueueWrites++; + } else { + iqIOStats.intInstQueueWrites++; + } + // Make sure the instruction is valid + assert(new_inst); + + DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n", + new_inst->seqNum, new_inst->pcState()); + + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + --freeEntries; + + new_inst->setInIQ(); + + // Look through its source registers (physical regs), and mark any + // dependencies. + addToDependents(new_inst); + + // Have this instruction set itself as the producer of its destination + // register(s). + addToProducers(new_inst); + + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insert(new_inst); + } else { + addIfReady(new_inst); + } + + ++iqStats.instsAdded; + + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} + +void +InstructionQueue::insertNonSpec(const O3DynInstPtr &new_inst) +{ + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + if (new_inst->isFloating()) { + iqIOStats.fpInstQueueWrites++; + } else if (new_inst->isVector()) { + iqIOStats.vecInstQueueWrites++; + } else { + iqIOStats.intInstQueueWrites++; + } + + assert(new_inst); + + nonSpecInsts[new_inst->seqNum] = new_inst; + + DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s " + "to the IQ.\n", + new_inst->seqNum, new_inst->pcState()); + + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + --freeEntries; + + new_inst->setInIQ(); + + // Have this instruction set itself as the producer of its destination + // register(s). + addToProducers(new_inst); + + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } + + ++iqStats.nonSpecInstsAdded; + + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +} + +void +InstructionQueue::insertBarrier(const O3DynInstPtr &barr_inst) +{ + memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst); + + insertNonSpec(barr_inst); +} + +O3DynInstPtr +InstructionQueue::getInstToExecute() +{ + assert(!instsToExecute.empty()); + O3DynInstPtr inst = std::move(instsToExecute.front()); + instsToExecute.pop_front(); + if (inst->isFloating()) { + iqIOStats.fpInstQueueReads++; + } else if (inst->isVector()) { + iqIOStats.vecInstQueueReads++; + } else { + iqIOStats.intInstQueueReads++; + } + return inst; +} + +void +InstructionQueue::addToOrderList(OpClass op_class) +{ + assert(!readyInsts[op_class].empty()); + + ListOrderEntry queue_entry; + + queue_entry.queueType = op_class; + + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + ListOrderIt list_it = listOrder.begin(); + ListOrderIt list_end_it = listOrder.end(); + + while (list_it != list_end_it) { + if ((*list_it).oldestInst > queue_entry.oldestInst) { + break; + } + + list_it++; + } + + readyIt[op_class] = listOrder.insert(list_it, queue_entry); + queueOnList[op_class] = true; +} + +void +InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it) +{ + // Get iterator of next item on the list + // Delete the original iterator + // Determine if the next item is either the end of the list or younger + // than the new instruction. If so, then add in a new iterator right here. + // If not, then move along. + ListOrderEntry queue_entry; + OpClass op_class = (*list_order_it).queueType; + ListOrderIt next_it = list_order_it; + + ++next_it; + + queue_entry.queueType = op_class; + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + while (next_it != listOrder.end() && + (*next_it).oldestInst < queue_entry.oldestInst) { + ++next_it; + } + + readyIt[op_class] = listOrder.insert(next_it, queue_entry); +} + +void +InstructionQueue::processFUCompletion(const O3DynInstPtr &inst, int fu_idx) +{ + DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum); + assert(!cpu->switchedOut()); + // The CPU could have been sleeping until this op completed (*extremely* + // long latency op). Wake it if it was. This may be overkill. + --wbOutstanding; + iewStage->wakeCPU(); + + if (fu_idx > -1) + fuPool->freeUnitNextCycle(fu_idx); + + // @todo: Ensure that these FU Completions happen at the beginning + // of a cycle, otherwise they could add too many instructions to + // the queue. + issueToExecuteQueue->access(-1)->size++; + instsToExecute.push_back(inst); +} + +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +void +InstructionQueue::scheduleReadyInsts() +{ + DPRINTF(IQ, "Attempting to schedule ready instructions from " + "the IQ.\n"); + + O3Comm::IssueStruct *i2e_info = issueToExecuteQueue->access(0); + + O3DynInstPtr mem_inst; + while ((mem_inst = getDeferredMemInstToExecute())) { + addReadyMemInst(mem_inst); + } + + // See if any cache blocked instructions are able to be executed + while ((mem_inst = getBlockedMemInstToExecute())) { + addReadyMemInst(mem_inst); + } + + // Have iterator to head of the list + // While I haven't exceeded bandwidth or reached the end of the list, + // Try to get a FU that can do what this op needs. + // If successful, change the oldestInst to the new top of the list, put + // the queue in the proper place in the list. + // Increment the iterator. + // This will avoid trying to schedule a certain op class if there are no + // FUs that handle it. + int total_issued = 0; + ListOrderIt order_it = listOrder.begin(); + ListOrderIt order_end_it = listOrder.end(); + + while (total_issued < totalWidth && order_it != order_end_it) { + OpClass op_class = (*order_it).queueType; + + assert(!readyInsts[op_class].empty()); + + O3DynInstPtr issuing_inst = readyInsts[op_class].top(); + + if (issuing_inst->isFloating()) { + iqIOStats.fpInstQueueReads++; + } else if (issuing_inst->isVector()) { + iqIOStats.vecInstQueueReads++; + } else { + iqIOStats.intInstQueueReads++; + } + + assert(issuing_inst->seqNum == (*order_it).oldestInst); + + if (issuing_inst->isSquashed()) { + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + listOrder.erase(order_it++); + + ++iqStats.squashedInstsIssued; + + continue; + } + + int idx = FUPool::NoCapableFU; + Cycles op_latency = Cycles(1); + ThreadID tid = issuing_inst->threadNumber; + + if (op_class != No_OpClass) { + idx = fuPool->getUnit(op_class); + if (issuing_inst->isFloating()) { + iqIOStats.fpAluAccesses++; + } else if (issuing_inst->isVector()) { + iqIOStats.vecAluAccesses++; + } else { + iqIOStats.intAluAccesses++; + } + if (idx > FUPool::NoFreeFU) { + op_latency = fuPool->getOpLatency(op_class); + } + } + + // If we have an instruction that doesn't require a FU, or a + // valid FU, then schedule for execution. + if (idx != FUPool::NoFreeFU) { + if (op_latency == Cycles(1)) { + i2e_info->size++; + instsToExecute.push_back(issuing_inst); + + // Add the FU onto the list of FU's to be freed next + // cycle if we used one. + if (idx >= 0) + fuPool->freeUnitNextCycle(idx); + } else { + bool pipelined = fuPool->isPipelined(op_class); + // Generate completion event for the FU + ++wbOutstanding; + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); + + cpu->schedule(execution, + cpu->clockEdge(Cycles(op_latency - 1))); + + if (!pipelined) { + // If FU isn't pipelined, then it must be freed + // upon the execution completing. + execution->setFreeFU(); + } else { + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnitNextCycle(idx); + } + } + + DPRINTF(IQ, "Thread %i: Issuing instruction PC %s " + "[sn:%llu]\n", + tid, issuing_inst->pcState(), + issuing_inst->seqNum); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + issuing_inst->setIssued(); + ++total_issued; + +#if TRACING_ON + issuing_inst->issueTick = curTick() - issuing_inst->fetchTick; +#endif + + if (!issuing_inst->isMemRef()) { + // Memory instructions can not be freed from the IQ until they + // complete. + ++freeEntries; + count[tid]--; + issuing_inst->clearInIQ(); + } else { + memDepUnit[tid].issue(issuing_inst); + } + + listOrder.erase(order_it++); + iqStats.statIssuedInstType[tid][op_class]++; + } else { + iqStats.statFuBusy[op_class]++; + iqStats.fuBusy[tid]++; + ++order_it; + } + } + + iqStats.numIssuedDist.sample(total_issued); + iqStats.instsIssued+= total_issued; + + // If we issued any instructions, tell the CPU we had activity. + // @todo If the way deferred memory instructions are handeled due to + // translation changes then the deferredMemInsts condition should be + // removed from the code below. + if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) { + cpu->activityThisCycle(); + } else { + DPRINTF(IQ, "Not able to schedule any instructions.\n"); + } +} + +void +InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready " + "to execute.\n", inst); + + NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + + ThreadID tid = (*inst_it).second->threadNumber; + + (*inst_it).second->setAtCommit(); + + (*inst_it).second->setCanIssue(); + + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + memDepUnit[tid].nonSpecInstReady((*inst_it).second); + } + + (*inst_it).second = NULL; + + nonSpecInsts.erase(inst_it); +} + +void +InstructionQueue::commit(const InstSeqNum &inst, ThreadID tid) +{ + DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n", + tid,inst); + + ListIt iq_it = instList[tid].begin(); + + while (iq_it != instList[tid].end() && + (*iq_it)->seqNum <= inst) { + ++iq_it; + instList[tid].pop_front(); + } + + assert(freeEntries == (numEntries - countInsts())); +} + +int +InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst) +{ + int dependents = 0; + + // The instruction queue here takes care of both floating and int ops + if (completed_inst->isFloating()) { + iqIOStats.fpInstQueueWakeupAccesses++; + } else if (completed_inst->isVector()) { + iqIOStats.vecInstQueueWakeupAccesses++; + } else { + iqIOStats.intInstQueueWakeupAccesses++; + } + + DPRINTF(IQ, "Waking dependents of completed instruction.\n"); + + assert(!completed_inst->isSquashed()); + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. Also complete the memory + // instruction at this point since we know it executed without issues. + ThreadID tid = completed_inst->threadNumber; + if (completed_inst->isMemRef()) { + memDepUnit[tid].completeInst(completed_inst); + + DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n", + completed_inst->pcState(), completed_inst->seqNum); + + ++freeEntries; + completed_inst->memOpDone(true); + count[tid]--; + } else if (completed_inst->isReadBarrier() || + completed_inst->isWriteBarrier()) { + // Completes a non mem ref barrier + memDepUnit[tid].completeInst(completed_inst); + } + + for (int dest_reg_idx = 0; + dest_reg_idx < completed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIdPtr dest_reg = + completed_inst->regs.renamedDestIdx(dest_reg_idx); + + // Special case of uniq or control registers. They are not + // handled by the IQ and thus have no dependency graph entry. + if (dest_reg->isFixedMapping()) { + DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n", + dest_reg->index(), dest_reg->className()); + continue; + } + + // Avoid waking up dependents if the register is pinned + dest_reg->decrNumPinnedWritesToComplete(); + if (dest_reg->isPinned()) + completed_inst->setPinnedRegsWritten(); + + if (dest_reg->getNumPinnedWritesToComplete() != 0) { + DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n", + dest_reg->index(), dest_reg->className()); + continue; + } + + DPRINTF(IQ, "Waking any dependents on register %i (%s).\n", + dest_reg->index(), + dest_reg->className()); + + //Go through the dependency chain, marking the registers as + //ready within the waiting instructions. + O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex()); + + while (dep_inst) { + DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] " + "PC %s.\n", dep_inst->seqNum, dep_inst->pcState()); + + // Might want to give more information to the instruction + // so that it knows which of its source registers is + // ready. However that would mean that the dependency + // graph entries would need to hold the src_reg_idx. + dep_inst->markSrcRegReady(); + + addIfReady(dep_inst); + + dep_inst = dependGraph.pop(dest_reg->flatIndex()); + + ++dependents; + } + + // Reset the head node now that all of its dependents have + // been woken up. + assert(dependGraph.empty(dest_reg->flatIndex())); + dependGraph.clearInst(dest_reg->flatIndex()); + + // Mark the scoreboard as having that register ready. + regScoreboard[dest_reg->flatIndex()] = true; + } + return dependents; +} + +void +InstructionQueue::addReadyMemInst(const O3DynInstPtr &ready_inst) +{ + OpClass op_class = ready_inst->opClass(); + + readyInsts[op_class].push(ready_inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %s opclass:%i [sn:%llu].\n", + ready_inst->pcState(), op_class, ready_inst->seqNum); +} + +void +InstructionQueue::rescheduleMemInst(const O3DynInstPtr &resched_inst) +{ + DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum); + + // Reset DTB translation state + resched_inst->translationStarted(false); + resched_inst->translationCompleted(false); + + resched_inst->clearCanIssue(); + memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); +} + +void +InstructionQueue::replayMemInst(const O3DynInstPtr &replay_inst) +{ + memDepUnit[replay_inst->threadNumber].replay(); +} + +void +InstructionQueue::deferMemInst(const O3DynInstPtr &deferred_inst) +{ + deferredMemInsts.push_back(deferred_inst); +} + +void +InstructionQueue::blockMemInst(const O3DynInstPtr &blocked_inst) +{ + blocked_inst->clearIssued(); + blocked_inst->clearCanIssue(); + blockedMemInsts.push_back(blocked_inst); +} + +void +InstructionQueue::cacheUnblocked() +{ + retryMemInsts.splice(retryMemInsts.end(), blockedMemInsts); + // Get the CPU ticking again + cpu->wakeCPU(); +} + +O3DynInstPtr +InstructionQueue::getDeferredMemInstToExecute() +{ + for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); + ++it) { + if ((*it)->translationCompleted() || (*it)->isSquashed()) { + O3DynInstPtr mem_inst = std::move(*it); + deferredMemInsts.erase(it); + return mem_inst; + } + } + return nullptr; +} + +O3DynInstPtr +InstructionQueue::getBlockedMemInstToExecute() +{ + if (retryMemInsts.empty()) { + return nullptr; + } else { + O3DynInstPtr mem_inst = std::move(retryMemInsts.front()); + retryMemInsts.pop_front(); + return mem_inst; + } +} + +void +InstructionQueue::violation(const O3DynInstPtr &store, + const O3DynInstPtr &faulting_load) +{ + iqIOStats.intInstQueueWrites++; + memDepUnit[store->threadNumber].violation(store, faulting_load); +} + +void +InstructionQueue::squash(ThreadID tid) +{ + DPRINTF(IQ, "[tid:%i] Starting to squash instructions in " + "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; + + doSquash(tid); + + // Also tell the memory dependence unit to squash. + memDepUnit[tid].squash(squashedSeqNum[tid], tid); +} + +void +InstructionQueue::doSquash(ThreadID tid) +{ + // Start at the tail. + ListIt squash_it = instList[tid].end(); + --squash_it; + + DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n", + tid, squashedSeqNum[tid]); + + // Squash any instructions younger than the squashed sequence number + // given. + while (squash_it != instList[tid].end() && + (*squash_it)->seqNum > squashedSeqNum[tid]) { + + O3DynInstPtr squashed_inst = (*squash_it); + if (squashed_inst->isFloating()) { + iqIOStats.fpInstQueueWrites++; + } else if (squashed_inst->isVector()) { + iqIOStats.vecInstQueueWrites++; + } else { + iqIOStats.intInstQueueWrites++; + } + + // Only handle the instruction if it actually is in the IQ and + // hasn't already been squashed in the IQ. + if (squashed_inst->threadNumber != tid || + squashed_inst->isSquashedInIQ()) { + --squash_it; + continue; + } + + if (!squashed_inst->isIssued() || + (squashed_inst->isMemRef() && + !squashed_inst->memOpDone())) { + + DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n", + tid, squashed_inst->seqNum, squashed_inst->pcState()); + + bool is_acq_rel = squashed_inst->isFullMemBarrier() && + (squashed_inst->isLoad() || + (squashed_inst->isStore() && + !squashed_inst->isStoreConditional())); + + // Remove the instruction from the dependency list. + if (is_acq_rel || + (!squashed_inst->isNonSpeculative() && + !squashed_inst->isStoreConditional() && + !squashed_inst->isAtomic() && + !squashed_inst->isReadBarrier() && + !squashed_inst->isWriteBarrier())) { + + for (int src_reg_idx = 0; + src_reg_idx < squashed_inst->numSrcRegs(); + src_reg_idx++) + { + PhysRegIdPtr src_reg = + squashed_inst->regs.renamedSrcIdx(src_reg_idx); + + // Only remove it from the dependency graph if it + // was placed there in the first place. + + // Instead of doing a linked list traversal, we + // can just remove these squashed instructions + // either at issue time, or when the register is + // overwritten. The only downside to this is it + // leaves more room for error. + + if (!squashed_inst->regs.readySrcIdx(src_reg_idx) && + !src_reg->isFixedMapping()) { + dependGraph.remove(src_reg->flatIndex(), + squashed_inst); + } + + ++iqStats.squashedOperandsExamined; + } + + } else if (!squashed_inst->isStoreConditional() || + !squashed_inst->isCompleted()) { + NonSpecMapIt ns_inst_it = + nonSpecInsts.find(squashed_inst->seqNum); + + // we remove non-speculative instructions from + // nonSpecInsts already when they are ready, and so we + // cannot always expect to find them + if (ns_inst_it == nonSpecInsts.end()) { + // loads that became ready but stalled on a + // blocked cache are alreayd removed from + // nonSpecInsts, and have not faulted + assert(squashed_inst->getFault() != NoFault || + squashed_inst->isMemRef()); + } else { + + (*ns_inst_it).second = NULL; + + nonSpecInsts.erase(ns_inst_it); + + ++iqStats.squashedNonSpecRemoved; + } + } + + // Might want to also clear out the head of the dependency graph. + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + // @todo: Remove this hack where several statuses are set so the + // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); + squashed_inst->clearInIQ(); + + //Update Thread IQ Count + count[squashed_inst->threadNumber]--; + + ++freeEntries; + } + + // IQ clears out the heads of the dependency graph only when + // instructions reach writeback stage. If an instruction is squashed + // before writeback stage, its head of dependency graph would not be + // cleared out; it holds the instruction's O3DynInstPtr. This prevents + // freeing the squashed instruction's DynInst. + // Thus, we need to manually clear out the squashed instructions' heads + // of dependency graph. + for (int dest_reg_idx = 0; + dest_reg_idx < squashed_inst->numDestRegs(); + dest_reg_idx++) + { + PhysRegIdPtr dest_reg = + squashed_inst->regs.renamedDestIdx(dest_reg_idx); + if (dest_reg->isFixedMapping()){ + continue; + } + assert(dependGraph.empty(dest_reg->flatIndex())); + dependGraph.clearInst(dest_reg->flatIndex()); + } + instList[tid].erase(squash_it--); + ++iqStats.squashedInstsExamined; + } +} + +bool +InstructionQueue::PqCompare::operator()( + const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const +{ + return lhs->seqNum > rhs->seqNum; +} + +bool +InstructionQueue::addToDependents(const O3DynInstPtr &new_inst) +{ + // Loop through the instruction's source registers, adding + // them to the dependency list if they are not ready. + int8_t total_src_regs = new_inst->numSrcRegs(); + bool return_val = false; + + for (int src_reg_idx = 0; + src_reg_idx < total_src_regs; + src_reg_idx++) + { + // Only add it to the dependency graph if it's not ready. + if (!new_inst->regs.readySrcIdx(src_reg_idx)) { + PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx); + + // Check the IQ's scoreboard to make sure the register + // hasn't become ready while the instruction was in flight + // between stages. Only if it really isn't ready should + // it be added to the dependency graph. + if (src_reg->isFixedMapping()) { + continue; + } else if (!regScoreboard[src_reg->flatIndex()]) { + DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that " + "is being added to the dependency chain.\n", + new_inst->pcState(), src_reg->index(), + src_reg->className()); + + dependGraph.insert(src_reg->flatIndex(), new_inst); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + } else { + DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that " + "became ready before it reached the IQ.\n", + new_inst->pcState(), src_reg->index(), + src_reg->className()); + // Mark a register ready within the instruction. + new_inst->markSrcRegReady(src_reg_idx); + } + } + } + + return return_val; +} + +void +InstructionQueue::addToProducers(const O3DynInstPtr &new_inst) +{ + // Nothing really needs to be marked when an instruction becomes + // the producer of a register's value, but for convenience a ptr + // to the producing instruction will be placed in the head node of + // the dependency links. + int8_t total_dest_regs = new_inst->numDestRegs(); + + for (int dest_reg_idx = 0; + dest_reg_idx < total_dest_regs; + dest_reg_idx++) + { + PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx); + + // Some registers have fixed mapping, and there is no need to track + // dependencies as these instructions must be executed at commit. + if (dest_reg->isFixedMapping()) { + continue; + } + + if (!dependGraph.empty(dest_reg->flatIndex())) { + dependGraph.dump(); + panic("Dependency graph %i (%s) (flat: %i) not empty!", + dest_reg->index(), dest_reg->className(), + dest_reg->flatIndex()); + } + + dependGraph.setInst(dest_reg->flatIndex(), new_inst); + + // Mark the scoreboard to say it's not yet ready. + regScoreboard[dest_reg->flatIndex()] = false; + } +} + +void +InstructionQueue::addIfReady(const O3DynInstPtr &inst) +{ + // If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + if (inst->readyToIssue()) { + + //Add the instruction to the proper ready list. + if (inst->isMemRef()) { + + DPRINTF(IQ, "Checking if memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + memDepUnit[inst->threadNumber].regsReady(inst); + + return; + } + + OpClass op_class = inst->opClass(); + + DPRINTF(IQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %s opclass:%i [sn:%llu].\n", + inst->pcState(), op_class, inst->seqNum); + + readyInsts[op_class].push(inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + } +} + +int +InstructionQueue::countInsts() +{ + return numEntries - freeEntries; +} + +void +InstructionQueue::dumpLists() +{ + for (int i = 0; i < Num_OpClasses; ++i) { + cprintf("Ready list %i size: %i\n", i, readyInsts[i].size()); + + cprintf("\n"); + } + + cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); + + NonSpecMapIt non_spec_it = nonSpecInsts.begin(); + NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); + + cprintf("Non speculative list: "); + + while (non_spec_it != non_spec_end_it) { + cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(), + (*non_spec_it).second->seqNum); + ++non_spec_it; + } + + cprintf("\n"); + + ListOrderIt list_order_it = listOrder.begin(); + ListOrderIt list_order_end_it = listOrder.end(); + int i = 1; + + cprintf("List order: "); + + while (list_order_it != list_order_end_it) { + cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType, + (*list_order_it).oldestInst); + + ++list_order_it; + ++i; + } + + cprintf("\n"); +} + + +void +InstructionQueue::dumpInsts() +{ + for (ThreadID tid = 0; tid < numThreads; ++tid) { + int num = 0; + int valid_num = 0; + ListIt inst_list_it = instList[tid].begin(); + + while (inst_list_it != instList[tid].end()) { + cprintf("Instruction:%i\n", num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone()) { + // Loads that have not been marked as executed + // still count towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->pcState(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } + } + + cprintf("Insts to Execute list:\n"); + + int num = 0; + int valid_num = 0; + ListIt inst_list_it = instsToExecute.begin(); + + while (inst_list_it != instsToExecute.end()) + { + cprintf("Instruction:%i\n", + num); + if (!(*inst_list_it)->isSquashed()) { + if (!(*inst_list_it)->isIssued()) { + ++valid_num; + cprintf("Count:%i\n", valid_num); + } else if ((*inst_list_it)->isMemRef() && + !(*inst_list_it)->memOpDone()) { + // Loads that have not been marked as executed + // still count towards the total instructions. + ++valid_num; + cprintf("Count:%i\n", valid_num); + } + } + + cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n" + "Issued:%i\nSquashed:%i\n", + (*inst_list_it)->pcState(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + + if ((*inst_list_it)->isMemRef()) { + cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); + } + + cprintf("\n"); + + inst_list_it++; + ++num; + } +} diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 87c7981224..a545cba38d 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -53,6 +53,7 @@ #include "cpu/o3/comm.hh" #include "cpu/o3/dep_graph.hh" #include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/impl.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/mem_dep_unit.hh" #include "cpu/o3/store_set.hh" @@ -88,7 +89,6 @@ class FullO3CPU; * have the execute() function called on it. * @todo: Make IQ able to handle multiple FU pools. */ -template class InstructionQueue { public: @@ -106,7 +106,7 @@ class InstructionQueue int fuIdx; /** Pointer back to the instruction queue. */ - InstructionQueue *iqPtr; + InstructionQueue *iqPtr; /** Should the FU be added to the list to be freed upon * completing this event. @@ -116,7 +116,7 @@ class InstructionQueue public: /** Construct a FU completion event. */ FUCompletion(const O3DynInstPtr &_inst, int fu_idx, - InstructionQueue *iq_ptr); + InstructionQueue *iq_ptr); virtual void process(); virtual const char *description() const; @@ -124,8 +124,8 @@ class InstructionQueue }; /** Constructs an IQ. */ - InstructionQueue(FullO3CPU *cpu_ptr, DefaultIEW *iew_ptr, - const DerivO3CPUParams ¶ms); + InstructionQueue(FullO3CPU *cpu_ptr, + DefaultIEW *iew_ptr, const DerivO3CPUParams ¶ms); /** Destructs the IQ. */ ~InstructionQueue(); @@ -281,13 +281,13 @@ class InstructionQueue ///////////////////////// /** Pointer to the CPU. */ - FullO3CPU *cpu; + FullO3CPU *cpu; /** Cache interface. */ MemInterface *dcacheInterface; /** Pointer to IEW stage. */ - DefaultIEW *iewStage; + DefaultIEW *iewStage; /** The memory dependence unit, which tracks/predicts memory dependences * between instructions. @@ -478,7 +478,7 @@ class InstructionQueue struct IQStats : public Stats::Group { - IQStats(FullO3CPU *cpu, const unsigned &total_width); + IQStats(FullO3CPU *cpu, const unsigned &total_width); /** Stat for number of instructions added. */ Stats::Scalar instsAdded; /** Stat for number of non-speculative instructions added. */ diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh deleted file mode 100644 index e521fc13a8..0000000000 --- a/src/cpu/o3/inst_queue_impl.hh +++ /dev/null @@ -1,1623 +0,0 @@ -/* - * Copyright (c) 2011-2014, 2017-2020 ARM Limited - * Copyright (c) 2013 Advanced Micro Devices, Inc. - * All rights reserved. - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_INST_QUEUE_IMPL_HH__ -#define __CPU_O3_INST_QUEUE_IMPL_HH__ - -#include -#include - -#include "base/logging.hh" -#include "cpu/o3/dyn_inst.hh" -#include "cpu/o3/fu_pool.hh" -#include "cpu/o3/inst_queue.hh" -#include "cpu/o3/limits.hh" -#include "debug/IQ.hh" -#include "enums/OpClass.hh" -#include "params/DerivO3CPU.hh" -#include "sim/core.hh" - -// clang complains about std::set being overloaded with Packet::set if -// we open up the entire namespace std -using std::list; - -template -InstructionQueue::FUCompletion::FUCompletion(const O3DynInstPtr &_inst, - int fu_idx, InstructionQueue *iq_ptr) - : Event(Stat_Event_Pri, AutoDelete), - inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) -{ -} - -template -void -InstructionQueue::FUCompletion::process() -{ - iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); - inst = NULL; -} - - -template -const char * -InstructionQueue::FUCompletion::description() const -{ - return "Functional unit completion"; -} - -template -InstructionQueue::InstructionQueue(FullO3CPU *cpu_ptr, - DefaultIEW *iew_ptr, const DerivO3CPUParams ¶ms) - : cpu(cpu_ptr), - iewStage(iew_ptr), - fuPool(params.fuPool), - iqPolicy(params.smtIQPolicy), - numThreads(params.numThreads), - numEntries(params.numIQEntries), - totalWidth(params.issueWidth), - commitToIEWDelay(params.commitToIEWDelay), - iqStats(cpu, totalWidth), - iqIOStats(cpu) -{ - assert(fuPool); - - // Set the number of total physical registers - // As the vector registers have two addressing modes, they are added twice - numPhysRegs = params.numPhysIntRegs + params.numPhysFloatRegs + - params.numPhysVecRegs + - params.numPhysVecRegs * TheISA::NumVecElemPerVecReg + - params.numPhysVecPredRegs + - params.numPhysCCRegs; - - //Create an entry for each physical register within the - //dependency graph. - dependGraph.resize(numPhysRegs); - - // Resize the register scoreboard. - regScoreboard.resize(numPhysRegs); - - //Initialize Mem Dependence Units - for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { - memDepUnit[tid].init(params, tid, cpu_ptr); - memDepUnit[tid].setIQ(this); - } - - resetState(); - - //Figure out resource sharing policy - if (iqPolicy == SMTQueuePolicy::Dynamic) { - //Set Max Entries to Total ROB Capacity - for (ThreadID tid = 0; tid < numThreads; tid++) { - maxEntries[tid] = numEntries; - } - - } else if (iqPolicy == SMTQueuePolicy::Partitioned) { - //@todo:make work if part_amt doesnt divide evenly. - int part_amt = numEntries / numThreads; - - //Divide ROB up evenly - for (ThreadID tid = 0; tid < numThreads; tid++) { - maxEntries[tid] = part_amt; - } - - DPRINTF(IQ, "IQ sharing policy set to Partitioned:" - "%i entries per thread.\n",part_amt); - } else if (iqPolicy == SMTQueuePolicy::Threshold) { - double threshold = (double)params.smtIQThreshold / 100; - - int thresholdIQ = (int)((double)threshold * numEntries); - - //Divide up by threshold amount - for (ThreadID tid = 0; tid < numThreads; tid++) { - maxEntries[tid] = thresholdIQ; - } - - DPRINTF(IQ, "IQ sharing policy set to Threshold:" - "%i entries per thread.\n",thresholdIQ); - } - for (ThreadID tid = numThreads; tid < O3MaxThreads; tid++) { - maxEntries[tid] = 0; - } -} - -template -InstructionQueue::~InstructionQueue() -{ - dependGraph.reset(); -#ifdef DEBUG - cprintf("Nodes traversed: %i, removed: %i\n", - dependGraph.nodesTraversed, dependGraph.nodesRemoved); -#endif -} - -template -std::string -InstructionQueue::name() const -{ - return cpu->name() + ".iq"; -} - -template -InstructionQueue:: -IQStats::IQStats(FullO3CPU *cpu, const unsigned &total_width) - : Stats::Group(cpu), - ADD_STAT(instsAdded, Stats::Units::Count::get(), - "Number of instructions added to the IQ (excludes non-spec)"), - ADD_STAT(nonSpecInstsAdded, Stats::Units::Count::get(), - "Number of non-speculative instructions added to the IQ"), - ADD_STAT(instsIssued, Stats::Units::Count::get(), - "Number of instructions issued"), - ADD_STAT(intInstsIssued, Stats::Units::Count::get(), - "Number of integer instructions issued"), - ADD_STAT(floatInstsIssued, Stats::Units::Count::get(), - "Number of float instructions issued"), - ADD_STAT(branchInstsIssued, Stats::Units::Count::get(), - "Number of branch instructions issued"), - ADD_STAT(memInstsIssued, Stats::Units::Count::get(), - "Number of memory instructions issued"), - ADD_STAT(miscInstsIssued, Stats::Units::Count::get(), - "Number of miscellaneous instructions issued"), - ADD_STAT(squashedInstsIssued, Stats::Units::Count::get(), - "Number of squashed instructions issued"), - ADD_STAT(squashedInstsExamined, Stats::Units::Count::get(), - "Number of squashed instructions iterated over during squash; " - "mainly for profiling"), - ADD_STAT(squashedOperandsExamined, Stats::Units::Count::get(), - "Number of squashed operands that are examined and possibly " - "removed from graph"), - ADD_STAT(squashedNonSpecRemoved, Stats::Units::Count::get(), - "Number of squashed non-spec instructions that were removed"), - ADD_STAT(numIssuedDist, Stats::Units::Count::get(), - "Number of insts issued each cycle"), - ADD_STAT(statFuBusy, Stats::Units::Count::get(), - "attempts to use FU when none available"), - ADD_STAT(statIssuedInstType, Stats::Units::Count::get(), - "Number of instructions issued per FU type, per thread"), - ADD_STAT(issueRate, Stats::Units::Rate< - Stats::Units::Count, Stats::Units::Cycle>::get(), - "Inst issue rate", instsIssued / cpu->baseStats.numCycles), - ADD_STAT(fuBusy, Stats::Units::Count::get(), "FU busy when requested"), - ADD_STAT(fuBusyRate, Stats::Units::Rate< - Stats::Units::Count, Stats::Units::Count>::get(), - "FU busy rate (busy events/executed inst)") -{ - instsAdded - .prereq(instsAdded); - - nonSpecInstsAdded - .prereq(nonSpecInstsAdded); - - instsIssued - .prereq(instsIssued); - - intInstsIssued - .prereq(intInstsIssued); - - floatInstsIssued - .prereq(floatInstsIssued); - - branchInstsIssued - .prereq(branchInstsIssued); - - memInstsIssued - .prereq(memInstsIssued); - - miscInstsIssued - .prereq(miscInstsIssued); - - squashedInstsIssued - .prereq(squashedInstsIssued); - - squashedInstsExamined - .prereq(squashedInstsExamined); - - squashedOperandsExamined - .prereq(squashedOperandsExamined); - - squashedNonSpecRemoved - .prereq(squashedNonSpecRemoved); -/* - queueResDist - .init(Num_OpClasses, 0, 99, 2) - .name(name() + ".IQ:residence:") - .desc("cycles from dispatch to issue") - .flags(total | pdf | cdf ) - ; - for (int i = 0; i < Num_OpClasses; ++i) { - queueResDist.subname(i, opClassStrings[i]); - } -*/ - numIssuedDist - .init(0,total_width,1) - .flags(Stats::pdf) - ; -/* - dist_unissued - .init(Num_OpClasses+2) - .name(name() + ".unissued_cause") - .desc("Reason ready instruction not issued") - .flags(pdf | dist) - ; - for (int i=0; i < (Num_OpClasses + 2); ++i) { - dist_unissued.subname(i, unissued_names[i]); - } -*/ - statIssuedInstType - .init(cpu->numThreads,Enums::Num_OpClass) - .flags(Stats::total | Stats::pdf | Stats::dist) - ; - statIssuedInstType.ysubnames(Enums::OpClassStrings); - - // - // How long did instructions for a particular FU type wait prior to issue - // -/* - issueDelayDist - .init(Num_OpClasses,0,99,2) - .name(name() + ".") - .desc("cycles from operands ready to issue") - .flags(pdf | cdf) - ; - for (int i=0; inumThreads) - .flags(Stats::total) - ; - - fuBusyRate - .flags(Stats::total) - ; - fuBusyRate = fuBusy / instsIssued; -} - -template -InstructionQueue:: -IQIOStats::IQIOStats(Stats::Group *parent) - : Stats::Group(parent), - ADD_STAT(intInstQueueReads, Stats::Units::Count::get(), - "Number of integer instruction queue reads"), - ADD_STAT(intInstQueueWrites, Stats::Units::Count::get(), - "Number of integer instruction queue writes"), - ADD_STAT(intInstQueueWakeupAccesses, Stats::Units::Count::get(), - "Number of integer instruction queue wakeup accesses"), - ADD_STAT(fpInstQueueReads, Stats::Units::Count::get(), - "Number of floating instruction queue reads"), - ADD_STAT(fpInstQueueWrites, Stats::Units::Count::get(), - "Number of floating instruction queue writes"), - ADD_STAT(fpInstQueueWakeupAccesses, Stats::Units::Count::get(), - "Number of floating instruction queue wakeup accesses"), - ADD_STAT(vecInstQueueReads, Stats::Units::Count::get(), - "Number of vector instruction queue reads"), - ADD_STAT(vecInstQueueWrites, Stats::Units::Count::get(), - "Number of vector instruction queue writes"), - ADD_STAT(vecInstQueueWakeupAccesses, Stats::Units::Count::get(), - "Number of vector instruction queue wakeup accesses"), - ADD_STAT(intAluAccesses, Stats::Units::Count::get(), - "Number of integer alu accesses"), - ADD_STAT(fpAluAccesses, Stats::Units::Count::get(), - "Number of floating point alu accesses"), - ADD_STAT(vecAluAccesses, Stats::Units::Count::get(), - "Number of vector alu accesses") -{ - using namespace Stats; - intInstQueueReads - .flags(total); - - intInstQueueWrites - .flags(total); - - intInstQueueWakeupAccesses - .flags(total); - - fpInstQueueReads - .flags(total); - - fpInstQueueWrites - .flags(total); - - fpInstQueueWakeupAccesses - .flags(total); - - vecInstQueueReads - .flags(total); - - vecInstQueueWrites - .flags(total); - - vecInstQueueWakeupAccesses - .flags(total); - - intAluAccesses - .flags(total); - - fpAluAccesses - .flags(total); - - vecAluAccesses - .flags(total); -} - -template -void -InstructionQueue::resetState() -{ - //Initialize thread IQ counts - for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { - count[tid] = 0; - instList[tid].clear(); - } - - // Initialize the number of free IQ entries. - freeEntries = numEntries; - - // Note that in actuality, the registers corresponding to the logical - // registers start off as ready. However this doesn't matter for the - // IQ as the instruction should have been correctly told if those - // registers are ready in rename. Thus it can all be initialized as - // unready. - for (int i = 0; i < numPhysRegs; ++i) { - regScoreboard[i] = false; - } - - for (ThreadID tid = 0; tid < O3MaxThreads; ++tid) { - squashedSeqNum[tid] = 0; - } - - for (int i = 0; i < Num_OpClasses; ++i) { - while (!readyInsts[i].empty()) - readyInsts[i].pop(); - queueOnList[i] = false; - readyIt[i] = listOrder.end(); - } - nonSpecInsts.clear(); - listOrder.clear(); - deferredMemInsts.clear(); - blockedMemInsts.clear(); - retryMemInsts.clear(); - wbOutstanding = 0; -} - -template -void -InstructionQueue::setActiveThreads(list *at_ptr) -{ - activeThreads = at_ptr; -} - -template -void -InstructionQueue::setIssueToExecuteQueue( - TimeBuffer *i2e_ptr) -{ - issueToExecuteQueue = i2e_ptr; -} - -template -void -InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) -{ - timeBuffer = tb_ptr; - - fromCommit = timeBuffer->getWire(-commitToIEWDelay); -} - -template -bool -InstructionQueue::isDrained() const -{ - bool drained = dependGraph.empty() && - instsToExecute.empty() && - wbOutstanding == 0; - for (ThreadID tid = 0; tid < numThreads; ++tid) - drained = drained && memDepUnit[tid].isDrained(); - - return drained; -} - -template -void -InstructionQueue::drainSanityCheck() const -{ - assert(dependGraph.empty()); - assert(instsToExecute.empty()); - for (ThreadID tid = 0; tid < numThreads; ++tid) - memDepUnit[tid].drainSanityCheck(); -} - -template -void -InstructionQueue::takeOverFrom() -{ - resetState(); -} - -template -int -InstructionQueue::entryAmount(ThreadID num_threads) -{ - if (iqPolicy == SMTQueuePolicy::Partitioned) { - return numEntries / num_threads; - } else { - return 0; - } -} - - -template -void -InstructionQueue::resetEntries() -{ - if (iqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) { - int active_threads = activeThreads->size(); - - list::iterator threads = activeThreads->begin(); - list::iterator end = activeThreads->end(); - - while (threads != end) { - ThreadID tid = *threads++; - - if (iqPolicy == SMTQueuePolicy::Partitioned) { - maxEntries[tid] = numEntries / active_threads; - } else if (iqPolicy == SMTQueuePolicy::Threshold && - active_threads == 1) { - maxEntries[tid] = numEntries; - } - } - } -} - -template -unsigned -InstructionQueue::numFreeEntries() -{ - return freeEntries; -} - -template -unsigned -InstructionQueue::numFreeEntries(ThreadID tid) -{ - return maxEntries[tid] - count[tid]; -} - -// Might want to do something more complex if it knows how many instructions -// will be issued this cycle. -template -bool -InstructionQueue::isFull() -{ - if (freeEntries == 0) { - return(true); - } else { - return(false); - } -} - -template -bool -InstructionQueue::isFull(ThreadID tid) -{ - if (numFreeEntries(tid) == 0) { - return(true); - } else { - return(false); - } -} - -template -bool -InstructionQueue::hasReadyInsts() -{ - if (!listOrder.empty()) { - return true; - } - - for (int i = 0; i < Num_OpClasses; ++i) { - if (!readyInsts[i].empty()) { - return true; - } - } - - return false; -} - -template -void -InstructionQueue::insert(const O3DynInstPtr &new_inst) -{ - if (new_inst->isFloating()) { - iqIOStats.fpInstQueueWrites++; - } else if (new_inst->isVector()) { - iqIOStats.vecInstQueueWrites++; - } else { - iqIOStats.intInstQueueWrites++; - } - // Make sure the instruction is valid - assert(new_inst); - - DPRINTF(IQ, "Adding instruction [sn:%llu] PC %s to the IQ.\n", - new_inst->seqNum, new_inst->pcState()); - - assert(freeEntries != 0); - - instList[new_inst->threadNumber].push_back(new_inst); - - --freeEntries; - - new_inst->setInIQ(); - - // Look through its source registers (physical regs), and mark any - // dependencies. - addToDependents(new_inst); - - // Have this instruction set itself as the producer of its destination - // register(s). - addToProducers(new_inst); - - if (new_inst->isMemRef()) { - memDepUnit[new_inst->threadNumber].insert(new_inst); - } else { - addIfReady(new_inst); - } - - ++iqStats.instsAdded; - - count[new_inst->threadNumber]++; - - assert(freeEntries == (numEntries - countInsts())); -} - -template -void -InstructionQueue::insertNonSpec(const O3DynInstPtr &new_inst) -{ - // @todo: Clean up this code; can do it by setting inst as unable - // to issue, then calling normal insert on the inst. - if (new_inst->isFloating()) { - iqIOStats.fpInstQueueWrites++; - } else if (new_inst->isVector()) { - iqIOStats.vecInstQueueWrites++; - } else { - iqIOStats.intInstQueueWrites++; - } - - assert(new_inst); - - nonSpecInsts[new_inst->seqNum] = new_inst; - - DPRINTF(IQ, "Adding non-speculative instruction [sn:%llu] PC %s " - "to the IQ.\n", - new_inst->seqNum, new_inst->pcState()); - - assert(freeEntries != 0); - - instList[new_inst->threadNumber].push_back(new_inst); - - --freeEntries; - - new_inst->setInIQ(); - - // Have this instruction set itself as the producer of its destination - // register(s). - addToProducers(new_inst); - - // If it's a memory instruction, add it to the memory dependency - // unit. - if (new_inst->isMemRef()) { - memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); - } - - ++iqStats.nonSpecInstsAdded; - - count[new_inst->threadNumber]++; - - assert(freeEntries == (numEntries - countInsts())); -} - -template -void -InstructionQueue::insertBarrier(const O3DynInstPtr &barr_inst) -{ - memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst); - - insertNonSpec(barr_inst); -} - -template -O3DynInstPtr -InstructionQueue::getInstToExecute() -{ - assert(!instsToExecute.empty()); - O3DynInstPtr inst = std::move(instsToExecute.front()); - instsToExecute.pop_front(); - if (inst->isFloating()) { - iqIOStats.fpInstQueueReads++; - } else if (inst->isVector()) { - iqIOStats.vecInstQueueReads++; - } else { - iqIOStats.intInstQueueReads++; - } - return inst; -} - -template -void -InstructionQueue::addToOrderList(OpClass op_class) -{ - assert(!readyInsts[op_class].empty()); - - ListOrderEntry queue_entry; - - queue_entry.queueType = op_class; - - queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; - - ListOrderIt list_it = listOrder.begin(); - ListOrderIt list_end_it = listOrder.end(); - - while (list_it != list_end_it) { - if ((*list_it).oldestInst > queue_entry.oldestInst) { - break; - } - - list_it++; - } - - readyIt[op_class] = listOrder.insert(list_it, queue_entry); - queueOnList[op_class] = true; -} - -template -void -InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it) -{ - // Get iterator of next item on the list - // Delete the original iterator - // Determine if the next item is either the end of the list or younger - // than the new instruction. If so, then add in a new iterator right here. - // If not, then move along. - ListOrderEntry queue_entry; - OpClass op_class = (*list_order_it).queueType; - ListOrderIt next_it = list_order_it; - - ++next_it; - - queue_entry.queueType = op_class; - queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; - - while (next_it != listOrder.end() && - (*next_it).oldestInst < queue_entry.oldestInst) { - ++next_it; - } - - readyIt[op_class] = listOrder.insert(next_it, queue_entry); -} - -template -void -InstructionQueue::processFUCompletion( - const O3DynInstPtr &inst, int fu_idx) -{ - DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum); - assert(!cpu->switchedOut()); - // The CPU could have been sleeping until this op completed (*extremely* - // long latency op). Wake it if it was. This may be overkill. - --wbOutstanding; - iewStage->wakeCPU(); - - if (fu_idx > -1) - fuPool->freeUnitNextCycle(fu_idx); - - // @todo: Ensure that these FU Completions happen at the beginning - // of a cycle, otherwise they could add too many instructions to - // the queue. - issueToExecuteQueue->access(-1)->size++; - instsToExecute.push_back(inst); -} - -// @todo: Figure out a better way to remove the squashed items from the -// lists. Checking the top item of each list to see if it's squashed -// wastes time and forces jumps. -template -void -InstructionQueue::scheduleReadyInsts() -{ - DPRINTF(IQ, "Attempting to schedule ready instructions from " - "the IQ.\n"); - - O3Comm::IssueStruct *i2e_info = issueToExecuteQueue->access(0); - - O3DynInstPtr mem_inst; - while ((mem_inst = std::move(getDeferredMemInstToExecute()))) { - addReadyMemInst(mem_inst); - } - - // See if any cache blocked instructions are able to be executed - while ((mem_inst = std::move(getBlockedMemInstToExecute()))) { - addReadyMemInst(mem_inst); - } - - // Have iterator to head of the list - // While I haven't exceeded bandwidth or reached the end of the list, - // Try to get a FU that can do what this op needs. - // If successful, change the oldestInst to the new top of the list, put - // the queue in the proper place in the list. - // Increment the iterator. - // This will avoid trying to schedule a certain op class if there are no - // FUs that handle it. - int total_issued = 0; - ListOrderIt order_it = listOrder.begin(); - ListOrderIt order_end_it = listOrder.end(); - - while (total_issued < totalWidth && order_it != order_end_it) { - OpClass op_class = (*order_it).queueType; - - assert(!readyInsts[op_class].empty()); - - O3DynInstPtr issuing_inst = readyInsts[op_class].top(); - - if (issuing_inst->isFloating()) { - iqIOStats.fpInstQueueReads++; - } else if (issuing_inst->isVector()) { - iqIOStats.vecInstQueueReads++; - } else { - iqIOStats.intInstQueueReads++; - } - - assert(issuing_inst->seqNum == (*order_it).oldestInst); - - if (issuing_inst->isSquashed()) { - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - listOrder.erase(order_it++); - - ++iqStats.squashedInstsIssued; - - continue; - } - - int idx = FUPool::NoCapableFU; - Cycles op_latency = Cycles(1); - ThreadID tid = issuing_inst->threadNumber; - - if (op_class != No_OpClass) { - idx = fuPool->getUnit(op_class); - if (issuing_inst->isFloating()) { - iqIOStats.fpAluAccesses++; - } else if (issuing_inst->isVector()) { - iqIOStats.vecAluAccesses++; - } else { - iqIOStats.intAluAccesses++; - } - if (idx > FUPool::NoFreeFU) { - op_latency = fuPool->getOpLatency(op_class); - } - } - - // If we have an instruction that doesn't require a FU, or a - // valid FU, then schedule for execution. - if (idx != FUPool::NoFreeFU) { - if (op_latency == Cycles(1)) { - i2e_info->size++; - instsToExecute.push_back(issuing_inst); - - // Add the FU onto the list of FU's to be freed next - // cycle if we used one. - if (idx >= 0) - fuPool->freeUnitNextCycle(idx); - } else { - bool pipelined = fuPool->isPipelined(op_class); - // Generate completion event for the FU - ++wbOutstanding; - FUCompletion *execution = new FUCompletion(issuing_inst, - idx, this); - - cpu->schedule(execution, - cpu->clockEdge(Cycles(op_latency - 1))); - - if (!pipelined) { - // If FU isn't pipelined, then it must be freed - // upon the execution completing. - execution->setFreeFU(); - } else { - // Add the FU onto the list of FU's to be freed next cycle. - fuPool->freeUnitNextCycle(idx); - } - } - - DPRINTF(IQ, "Thread %i: Issuing instruction PC %s " - "[sn:%llu]\n", - tid, issuing_inst->pcState(), - issuing_inst->seqNum); - - readyInsts[op_class].pop(); - - if (!readyInsts[op_class].empty()) { - moveToYoungerInst(order_it); - } else { - readyIt[op_class] = listOrder.end(); - queueOnList[op_class] = false; - } - - issuing_inst->setIssued(); - ++total_issued; - -#if TRACING_ON - issuing_inst->issueTick = curTick() - issuing_inst->fetchTick; -#endif - - if (!issuing_inst->isMemRef()) { - // Memory instructions can not be freed from the IQ until they - // complete. - ++freeEntries; - count[tid]--; - issuing_inst->clearInIQ(); - } else { - memDepUnit[tid].issue(issuing_inst); - } - - listOrder.erase(order_it++); - iqStats.statIssuedInstType[tid][op_class]++; - } else { - iqStats.statFuBusy[op_class]++; - iqStats.fuBusy[tid]++; - ++order_it; - } - } - - iqStats.numIssuedDist.sample(total_issued); - iqStats.instsIssued+= total_issued; - - // If we issued any instructions, tell the CPU we had activity. - // @todo If the way deferred memory instructions are handeled due to - // translation changes then the deferredMemInsts condition should be removed - // from the code below. - if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) { - cpu->activityThisCycle(); - } else { - DPRINTF(IQ, "Not able to schedule any instructions.\n"); - } -} - -template -void -InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) -{ - DPRINTF(IQ, "Marking nonspeculative instruction [sn:%llu] as ready " - "to execute.\n", inst); - - NonSpecMapIt inst_it = nonSpecInsts.find(inst); - - assert(inst_it != nonSpecInsts.end()); - - ThreadID tid = (*inst_it).second->threadNumber; - - (*inst_it).second->setAtCommit(); - - (*inst_it).second->setCanIssue(); - - if (!(*inst_it).second->isMemRef()) { - addIfReady((*inst_it).second); - } else { - memDepUnit[tid].nonSpecInstReady((*inst_it).second); - } - - (*inst_it).second = NULL; - - nonSpecInsts.erase(inst_it); -} - -template -void -InstructionQueue::commit(const InstSeqNum &inst, ThreadID tid) -{ - DPRINTF(IQ, "[tid:%i] Committing instructions older than [sn:%llu]\n", - tid,inst); - - ListIt iq_it = instList[tid].begin(); - - while (iq_it != instList[tid].end() && - (*iq_it)->seqNum <= inst) { - ++iq_it; - instList[tid].pop_front(); - } - - assert(freeEntries == (numEntries - countInsts())); -} - -template -int -InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst) -{ - int dependents = 0; - - // The instruction queue here takes care of both floating and int ops - if (completed_inst->isFloating()) { - iqIOStats.fpInstQueueWakeupAccesses++; - } else if (completed_inst->isVector()) { - iqIOStats.vecInstQueueWakeupAccesses++; - } else { - iqIOStats.intInstQueueWakeupAccesses++; - } - - DPRINTF(IQ, "Waking dependents of completed instruction.\n"); - - assert(!completed_inst->isSquashed()); - - // Tell the memory dependence unit to wake any dependents on this - // instruction if it is a memory instruction. Also complete the memory - // instruction at this point since we know it executed without issues. - ThreadID tid = completed_inst->threadNumber; - if (completed_inst->isMemRef()) { - memDepUnit[tid].completeInst(completed_inst); - - DPRINTF(IQ, "Completing mem instruction PC: %s [sn:%llu]\n", - completed_inst->pcState(), completed_inst->seqNum); - - ++freeEntries; - completed_inst->memOpDone(true); - count[tid]--; - } else if (completed_inst->isReadBarrier() || - completed_inst->isWriteBarrier()) { - // Completes a non mem ref barrier - memDepUnit[tid].completeInst(completed_inst); - } - - for (int dest_reg_idx = 0; - dest_reg_idx < completed_inst->numDestRegs(); - dest_reg_idx++) - { - PhysRegIdPtr dest_reg = - completed_inst->regs.renamedDestIdx(dest_reg_idx); - - // Special case of uniq or control registers. They are not - // handled by the IQ and thus have no dependency graph entry. - if (dest_reg->isFixedMapping()) { - DPRINTF(IQ, "Reg %d [%s] is part of a fix mapping, skipping\n", - dest_reg->index(), dest_reg->className()); - continue; - } - - // Avoid waking up dependents if the register is pinned - dest_reg->decrNumPinnedWritesToComplete(); - if (dest_reg->isPinned()) - completed_inst->setPinnedRegsWritten(); - - if (dest_reg->getNumPinnedWritesToComplete() != 0) { - DPRINTF(IQ, "Reg %d [%s] is pinned, skipping\n", - dest_reg->index(), dest_reg->className()); - continue; - } - - DPRINTF(IQ, "Waking any dependents on register %i (%s).\n", - dest_reg->index(), - dest_reg->className()); - - //Go through the dependency chain, marking the registers as - //ready within the waiting instructions. - O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex()); - - while (dep_inst) { - DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] " - "PC %s.\n", dep_inst->seqNum, dep_inst->pcState()); - - // Might want to give more information to the instruction - // so that it knows which of its source registers is - // ready. However that would mean that the dependency - // graph entries would need to hold the src_reg_idx. - dep_inst->markSrcRegReady(); - - addIfReady(dep_inst); - - dep_inst = dependGraph.pop(dest_reg->flatIndex()); - - ++dependents; - } - - // Reset the head node now that all of its dependents have - // been woken up. - assert(dependGraph.empty(dest_reg->flatIndex())); - dependGraph.clearInst(dest_reg->flatIndex()); - - // Mark the scoreboard as having that register ready. - regScoreboard[dest_reg->flatIndex()] = true; - } - return dependents; -} - -template -void -InstructionQueue::addReadyMemInst(const O3DynInstPtr &ready_inst) -{ - OpClass op_class = ready_inst->opClass(); - - readyInsts[op_class].push(ready_inst); - - // Will need to reorder the list if either a queue is not on the list, - // or it has an older instruction than last time. - if (!queueOnList[op_class]) { - addToOrderList(op_class); - } else if (readyInsts[op_class].top()->seqNum < - (*readyIt[op_class]).oldestInst) { - listOrder.erase(readyIt[op_class]); - addToOrderList(op_class); - } - - DPRINTF(IQ, "Instruction is ready to issue, putting it onto " - "the ready list, PC %s opclass:%i [sn:%llu].\n", - ready_inst->pcState(), op_class, ready_inst->seqNum); -} - -template -void -InstructionQueue::rescheduleMemInst(const O3DynInstPtr &resched_inst) -{ - DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum); - - // Reset DTB translation state - resched_inst->translationStarted(false); - resched_inst->translationCompleted(false); - - resched_inst->clearCanIssue(); - memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); -} - -template -void -InstructionQueue::replayMemInst(const O3DynInstPtr &replay_inst) -{ - memDepUnit[replay_inst->threadNumber].replay(); -} - -template -void -InstructionQueue::deferMemInst(const O3DynInstPtr &deferred_inst) -{ - deferredMemInsts.push_back(deferred_inst); -} - -template -void -InstructionQueue::blockMemInst(const O3DynInstPtr &blocked_inst) -{ - blocked_inst->clearIssued(); - blocked_inst->clearCanIssue(); - blockedMemInsts.push_back(blocked_inst); -} - -template -void -InstructionQueue::cacheUnblocked() -{ - retryMemInsts.splice(retryMemInsts.end(), blockedMemInsts); - // Get the CPU ticking again - cpu->wakeCPU(); -} - -template -O3DynInstPtr -InstructionQueue::getDeferredMemInstToExecute() -{ - for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); - ++it) { - if ((*it)->translationCompleted() || (*it)->isSquashed()) { - O3DynInstPtr mem_inst = std::move(*it); - deferredMemInsts.erase(it); - return mem_inst; - } - } - return nullptr; -} - -template -O3DynInstPtr -InstructionQueue::getBlockedMemInstToExecute() -{ - if (retryMemInsts.empty()) { - return nullptr; - } else { - O3DynInstPtr mem_inst = std::move(retryMemInsts.front()); - retryMemInsts.pop_front(); - return mem_inst; - } -} - -template -void -InstructionQueue::violation(const O3DynInstPtr &store, - const O3DynInstPtr &faulting_load) -{ - iqIOStats.intInstQueueWrites++; - memDepUnit[store->threadNumber].violation(store, faulting_load); -} - -template -void -InstructionQueue::squash(ThreadID tid) -{ - DPRINTF(IQ, "[tid:%i] Starting to squash instructions in " - "the IQ.\n", tid); - - // Read instruction sequence number of last instruction out of the - // time buffer. - squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneSeqNum; - - doSquash(tid); - - // Also tell the memory dependence unit to squash. - memDepUnit[tid].squash(squashedSeqNum[tid], tid); -} - -template -void -InstructionQueue::doSquash(ThreadID tid) -{ - // Start at the tail. - ListIt squash_it = instList[tid].end(); - --squash_it; - - DPRINTF(IQ, "[tid:%i] Squashing until sequence number %i!\n", - tid, squashedSeqNum[tid]); - - // Squash any instructions younger than the squashed sequence number - // given. - while (squash_it != instList[tid].end() && - (*squash_it)->seqNum > squashedSeqNum[tid]) { - - O3DynInstPtr squashed_inst = (*squash_it); - if (squashed_inst->isFloating()) { - iqIOStats.fpInstQueueWrites++; - } else if (squashed_inst->isVector()) { - iqIOStats.vecInstQueueWrites++; - } else { - iqIOStats.intInstQueueWrites++; - } - - // Only handle the instruction if it actually is in the IQ and - // hasn't already been squashed in the IQ. - if (squashed_inst->threadNumber != tid || - squashed_inst->isSquashedInIQ()) { - --squash_it; - continue; - } - - if (!squashed_inst->isIssued() || - (squashed_inst->isMemRef() && - !squashed_inst->memOpDone())) { - - DPRINTF(IQ, "[tid:%i] Instruction [sn:%llu] PC %s squashed.\n", - tid, squashed_inst->seqNum, squashed_inst->pcState()); - - bool is_acq_rel = squashed_inst->isFullMemBarrier() && - (squashed_inst->isLoad() || - (squashed_inst->isStore() && - !squashed_inst->isStoreConditional())); - - // Remove the instruction from the dependency list. - if (is_acq_rel || - (!squashed_inst->isNonSpeculative() && - !squashed_inst->isStoreConditional() && - !squashed_inst->isAtomic() && - !squashed_inst->isReadBarrier() && - !squashed_inst->isWriteBarrier())) { - - for (int src_reg_idx = 0; - src_reg_idx < squashed_inst->numSrcRegs(); - src_reg_idx++) - { - PhysRegIdPtr src_reg = - squashed_inst->regs.renamedSrcIdx(src_reg_idx); - - // Only remove it from the dependency graph if it - // was placed there in the first place. - - // Instead of doing a linked list traversal, we - // can just remove these squashed instructions - // either at issue time, or when the register is - // overwritten. The only downside to this is it - // leaves more room for error. - - if (!squashed_inst->regs.readySrcIdx(src_reg_idx) && - !src_reg->isFixedMapping()) { - dependGraph.remove(src_reg->flatIndex(), - squashed_inst); - } - - ++iqStats.squashedOperandsExamined; - } - - } else if (!squashed_inst->isStoreConditional() || - !squashed_inst->isCompleted()) { - NonSpecMapIt ns_inst_it = - nonSpecInsts.find(squashed_inst->seqNum); - - // we remove non-speculative instructions from - // nonSpecInsts already when they are ready, and so we - // cannot always expect to find them - if (ns_inst_it == nonSpecInsts.end()) { - // loads that became ready but stalled on a - // blocked cache are alreayd removed from - // nonSpecInsts, and have not faulted - assert(squashed_inst->getFault() != NoFault || - squashed_inst->isMemRef()); - } else { - - (*ns_inst_it).second = NULL; - - nonSpecInsts.erase(ns_inst_it); - - ++iqStats.squashedNonSpecRemoved; - } - } - - // Might want to also clear out the head of the dependency graph. - - // Mark it as squashed within the IQ. - squashed_inst->setSquashedInIQ(); - - // @todo: Remove this hack where several statuses are set so the - // inst will flow through the rest of the pipeline. - squashed_inst->setIssued(); - squashed_inst->setCanCommit(); - squashed_inst->clearInIQ(); - - //Update Thread IQ Count - count[squashed_inst->threadNumber]--; - - ++freeEntries; - } - - // IQ clears out the heads of the dependency graph only when - // instructions reach writeback stage. If an instruction is squashed - // before writeback stage, its head of dependency graph would not be - // cleared out; it holds the instruction's O3DynInstPtr. This prevents - // freeing the squashed instruction's DynInst. - // Thus, we need to manually clear out the squashed instructions' heads - // of dependency graph. - for (int dest_reg_idx = 0; - dest_reg_idx < squashed_inst->numDestRegs(); - dest_reg_idx++) - { - PhysRegIdPtr dest_reg = - squashed_inst->regs.renamedDestIdx(dest_reg_idx); - if (dest_reg->isFixedMapping()){ - continue; - } - assert(dependGraph.empty(dest_reg->flatIndex())); - dependGraph.clearInst(dest_reg->flatIndex()); - } - instList[tid].erase(squash_it--); - ++iqStats.squashedInstsExamined; - } -} - -template -bool -InstructionQueue::PqCompare::operator()( - const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const -{ - return lhs->seqNum > rhs->seqNum; -} - -template -bool -InstructionQueue::addToDependents(const O3DynInstPtr &new_inst) -{ - // Loop through the instruction's source registers, adding - // them to the dependency list if they are not ready. - int8_t total_src_regs = new_inst->numSrcRegs(); - bool return_val = false; - - for (int src_reg_idx = 0; - src_reg_idx < total_src_regs; - src_reg_idx++) - { - // Only add it to the dependency graph if it's not ready. - if (!new_inst->regs.readySrcIdx(src_reg_idx)) { - PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx); - - // Check the IQ's scoreboard to make sure the register - // hasn't become ready while the instruction was in flight - // between stages. Only if it really isn't ready should - // it be added to the dependency graph. - if (src_reg->isFixedMapping()) { - continue; - } else if (!regScoreboard[src_reg->flatIndex()]) { - DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that " - "is being added to the dependency chain.\n", - new_inst->pcState(), src_reg->index(), - src_reg->className()); - - dependGraph.insert(src_reg->flatIndex(), new_inst); - - // Change the return value to indicate that something - // was added to the dependency graph. - return_val = true; - } else { - DPRINTF(IQ, "Instruction PC %s has src reg %i (%s) that " - "became ready before it reached the IQ.\n", - new_inst->pcState(), src_reg->index(), - src_reg->className()); - // Mark a register ready within the instruction. - new_inst->markSrcRegReady(src_reg_idx); - } - } - } - - return return_val; -} - -template -void -InstructionQueue::addToProducers(const O3DynInstPtr &new_inst) -{ - // Nothing really needs to be marked when an instruction becomes - // the producer of a register's value, but for convenience a ptr - // to the producing instruction will be placed in the head node of - // the dependency links. - int8_t total_dest_regs = new_inst->numDestRegs(); - - for (int dest_reg_idx = 0; - dest_reg_idx < total_dest_regs; - dest_reg_idx++) - { - PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx); - - // Some registers have fixed mapping, and there is no need to track - // dependencies as these instructions must be executed at commit. - if (dest_reg->isFixedMapping()) { - continue; - } - - if (!dependGraph.empty(dest_reg->flatIndex())) { - dependGraph.dump(); - panic("Dependency graph %i (%s) (flat: %i) not empty!", - dest_reg->index(), dest_reg->className(), - dest_reg->flatIndex()); - } - - dependGraph.setInst(dest_reg->flatIndex(), new_inst); - - // Mark the scoreboard to say it's not yet ready. - regScoreboard[dest_reg->flatIndex()] = false; - } -} - -template -void -InstructionQueue::addIfReady(const O3DynInstPtr &inst) -{ - // If the instruction now has all of its source registers - // available, then add it to the list of ready instructions. - if (inst->readyToIssue()) { - - //Add the instruction to the proper ready list. - if (inst->isMemRef()) { - - DPRINTF(IQ, "Checking if memory instruction can issue.\n"); - - // Message to the mem dependence unit that this instruction has - // its registers ready. - memDepUnit[inst->threadNumber].regsReady(inst); - - return; - } - - OpClass op_class = inst->opClass(); - - DPRINTF(IQ, "Instruction is ready to issue, putting it onto " - "the ready list, PC %s opclass:%i [sn:%llu].\n", - inst->pcState(), op_class, inst->seqNum); - - readyInsts[op_class].push(inst); - - // Will need to reorder the list if either a queue is not on the list, - // or it has an older instruction than last time. - if (!queueOnList[op_class]) { - addToOrderList(op_class); - } else if (readyInsts[op_class].top()->seqNum < - (*readyIt[op_class]).oldestInst) { - listOrder.erase(readyIt[op_class]); - addToOrderList(op_class); - } - } -} - -template -int -InstructionQueue::countInsts() -{ - return numEntries - freeEntries; -} - -template -void -InstructionQueue::dumpLists() -{ - for (int i = 0; i < Num_OpClasses; ++i) { - cprintf("Ready list %i size: %i\n", i, readyInsts[i].size()); - - cprintf("\n"); - } - - cprintf("Non speculative list size: %i\n", nonSpecInsts.size()); - - NonSpecMapIt non_spec_it = nonSpecInsts.begin(); - NonSpecMapIt non_spec_end_it = nonSpecInsts.end(); - - cprintf("Non speculative list: "); - - while (non_spec_it != non_spec_end_it) { - cprintf("%s [sn:%llu]", (*non_spec_it).second->pcState(), - (*non_spec_it).second->seqNum); - ++non_spec_it; - } - - cprintf("\n"); - - ListOrderIt list_order_it = listOrder.begin(); - ListOrderIt list_order_end_it = listOrder.end(); - int i = 1; - - cprintf("List order: "); - - while (list_order_it != list_order_end_it) { - cprintf("%i OpClass:%i [sn:%llu] ", i, (*list_order_it).queueType, - (*list_order_it).oldestInst); - - ++list_order_it; - ++i; - } - - cprintf("\n"); -} - - -template -void -InstructionQueue::dumpInsts() -{ - for (ThreadID tid = 0; tid < numThreads; ++tid) { - int num = 0; - int valid_num = 0; - ListIt inst_list_it = instList[tid].begin(); - - while (inst_list_it != instList[tid].end()) { - cprintf("Instruction:%i\n", num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone()) { - // Loads that have not been marked as executed - // still count towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->pcState(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } - } - - cprintf("Insts to Execute list:\n"); - - int num = 0; - int valid_num = 0; - ListIt inst_list_it = instsToExecute.begin(); - - while (inst_list_it != instsToExecute.end()) - { - cprintf("Instruction:%i\n", - num); - if (!(*inst_list_it)->isSquashed()) { - if (!(*inst_list_it)->isIssued()) { - ++valid_num; - cprintf("Count:%i\n", valid_num); - } else if ((*inst_list_it)->isMemRef() && - !(*inst_list_it)->memOpDone()) { - // Loads that have not been marked as executed - // still count towards the total instructions. - ++valid_num; - cprintf("Count:%i\n", valid_num); - } - } - - cprintf("PC: %s\n[sn:%llu]\n[tid:%i]\n" - "Issued:%i\nSquashed:%i\n", - (*inst_list_it)->pcState(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - - if ((*inst_list_it)->isMemRef()) { - cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone()); - } - - cprintf("\n"); - - inst_list_it++; - ++num; - } -} - -#endif//__CPU_O3_INST_QUEUE_IMPL_HH__ diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index 9e8abd03ee..9e3205ec60 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -1028,6 +1028,13 @@ LSQUnit::squash(const InstSeqNum &squashed_num) } } +uint64_t +LSQUnit::getLatestHtmUid() const +{ + const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr(); + return htm_cpt->getHtmUid(); +} + void LSQUnit::storePostSend() { @@ -1257,6 +1264,10 @@ LSQUnit::dumpInsts() const cprintf("\n"); } +void LSQUnit::schedule(Event& ev, Tick when) { cpu->schedule(ev, when); } + +BaseMMU *LSQUnit::getMMUPtr() { return cpu->mmu; } + unsigned int LSQUnit::cacheLineSize() { diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 0ad7203189..ffd3955652 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -312,12 +312,9 @@ class LSQUnit int numHtmStarts() const { return htmStarts; } int numHtmStops() const { return htmStops; } void resetHtmStartsStops() { htmStarts = htmStops = 0; } - uint64_t getLatestHtmUid() const - { - const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr(); - return htm_cpt->getHtmUid(); - } - void setLastRetiredHtmUid(uint64_t htm_uid) + uint64_t getLatestHtmUid() const; + void + setLastRetiredHtmUid(uint64_t htm_uid) { assert(htm_uid >= lastRetiredHtmUid); lastRetiredHtmUid = htm_uid; @@ -393,9 +390,9 @@ class LSQUnit void dumpInsts() const; /** Schedule event for the cpu. */ - void schedule(Event& ev, Tick when) { cpu->schedule(ev, when); } + void schedule(Event& ev, Tick when); - BaseMMU* getMMUPtr() { return cpu->mmu; } + BaseMMU *getMMUPtr(); private: /** Pointer to the CPU. */ diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 5f46cd7c8b..7ada849d57 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -144,7 +144,7 @@ MemDepUnit::takeOverFrom() } void -MemDepUnit::setIQ(InstructionQueue *iq_ptr) +MemDepUnit::setIQ(InstructionQueue *iq_ptr) { iqPtr = iq_ptr; } diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh index 21c5f71edf..24ef5055bc 100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@ -68,7 +68,6 @@ struct SNHash struct DerivO3CPUParams; -template class InstructionQueue; template @@ -117,7 +116,7 @@ class MemDepUnit void takeOverFrom(); /** Sets the pointer to the IQ. */ - void setIQ(InstructionQueue *iq_ptr); + void setIQ(InstructionQueue *iq_ptr); /** Inserts a memory instruction. */ void insert(const O3DynInstPtr &inst); @@ -258,7 +257,7 @@ class MemDepUnit void insertBarrierSN(const O3DynInstPtr &barr_inst); /** Pointer to the IQ. */ - InstructionQueue *iqPtr; + InstructionQueue *iqPtr; /** The thread id of this memory dependence unit. */ int id;