diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh index a13eec7c98..4a7dad86e3 100644 --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -51,6 +51,7 @@ #include "cpu/base.hh" #include "cpu/exec_context.hh" #include "cpu/inst_res.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/pc_event.hh" #include "cpu/simple_thread.hh" #include "cpu/static_inst.hh" @@ -559,12 +560,9 @@ class CheckerCPU : public BaseCPU, public ExecContext * template instantiations of the Checker must be placed at the bottom * of checker/cpu.cc. */ -template +template class Checker : public CheckerCPU { - private: - typedef typename Impl::DynInstPtr DynInstPtr; - public: Checker(const Params &p) : CheckerCPU(p), updateThisCycle(false), unverifiedInst(NULL) diff --git a/src/cpu/checker/cpu_impl.hh b/src/cpu/checker/cpu_impl.hh index b18bb8046a..de123d447f 100644 --- a/src/cpu/checker/cpu_impl.hh +++ b/src/cpu/checker/cpu_impl.hh @@ -59,9 +59,9 @@ #include "sim/sim_object.hh" #include "sim/stats.hh" -template +template void -Checker::advancePC(const Fault &fault) +Checker::advancePC(const Fault &fault) { if (fault != NoFault) { curMacroStaticInst = nullStaticInstPtr; @@ -80,9 +80,9 @@ Checker::advancePC(const Fault &fault) } ////////////////////////////////////////////////// -template +template void -Checker::handlePendingInt() +Checker::handlePendingInt() { DPRINTF(Checker, "IRQ detected at PC: %s with %d insts in buffer\n", thread->pcState(), instList.size()); @@ -114,9 +114,9 @@ Checker::handlePendingInt() curMacroStaticInst = nullStaticInstPtr; } -template +template void -Checker::verify(const DynInstPtr &completed_inst) +Checker::verify(const DynInstPtr &completed_inst) { DynInstPtr inst; @@ -428,22 +428,19 @@ Checker::verify(const DynInstPtr &completed_inst) unverifiedInst = NULL; } -template +template void -Checker::switchOut() +Checker::switchOut() { instList.clear(); } -template -void -Checker::takeOverFrom(BaseCPU *oldCPU) -{ -} +template +void Checker::takeOverFrom(BaseCPU *oldCPU) {} -template +template void -Checker::validateInst(const DynInstPtr &inst) +Checker::validateInst(const DynInstPtr &inst) { if (inst->instAddr() != thread->instAddr()) { warn("%lli: PCs do not match! Inst: %s, checker: %s", @@ -462,9 +459,9 @@ Checker::validateInst(const DynInstPtr &inst) } } -template +template void -Checker::validateExecution(const DynInstPtr &inst) +Checker::validateExecution(const DynInstPtr &inst) { InstResult checker_val; InstResult inst_val; @@ -555,9 +552,9 @@ Checker::validateExecution(const DynInstPtr &inst) // This function is weird, if it is called it means the Checker and // O3 have diverged, so panic is called for now. It may be useful // to resynch states and continue if the divergence is a false positive -template +template void -Checker::validateState() +Checker::validateState() { if (updateThisCycle) { // Change this back to warn if divergences end up being false positives @@ -580,10 +577,10 @@ Checker::validateState() } } -template +template void -Checker::copyResult(const DynInstPtr &inst, - const InstResult& mismatch_val, int start_idx) +Checker::copyResult( + const DynInstPtr &inst, const InstResult& mismatch_val, int start_idx) { // We've already popped one dest off the queue, // so do the fix-up then start with the next dest reg; @@ -657,9 +654,9 @@ Checker::copyResult(const DynInstPtr &inst, } } -template +template void -Checker::dumpAndExit(const DynInstPtr &inst) +Checker::dumpAndExit(const DynInstPtr &inst) { cprintf("Error detected, instruction information:\n"); cprintf("PC:%s, nextPC:%#x\n[sn:%lli]\n[tid:%i]\n" @@ -673,9 +670,9 @@ Checker::dumpAndExit(const DynInstPtr &inst) CheckerCPU::dumpAndExit(); } -template +template void -Checker::dumpInsts() +Checker::dumpInsts() { int num = 0; diff --git a/src/cpu/o3/checker.cc b/src/cpu/o3/checker.cc index ff498edabf..7461a29d3d 100644 --- a/src/cpu/o3/checker.cc +++ b/src/cpu/o3/checker.cc @@ -43,4 +43,4 @@ #include "cpu/checker/cpu_impl.hh" template -class Checker; +class Checker; diff --git a/src/cpu/o3/checker.hh b/src/cpu/o3/checker.hh index 0c7d6294ae..4a2fbbc851 100644 --- a/src/cpu/o3/checker.hh +++ b/src/cpu/o3/checker.hh @@ -48,10 +48,10 @@ /** * Specific non-templated derived class used for SimObject configuration. */ -class O3Checker : public Checker +class O3Checker : public Checker { public: - O3Checker(const Params &p) : Checker(p) + O3Checker(const Params &p) : Checker(p) { // The checker should check all instructions executed by the main // cpu and therefore any parameters for early exit don't make much diff --git a/src/cpu/o3/comm.hh b/src/cpu/o3/comm.hh index 39bf20ba5a..eb85e5e1e3 100644 --- a/src/cpu/o3/comm.hh +++ b/src/cpu/o3/comm.hh @@ -47,6 +47,7 @@ #include "arch/types.hh" #include "base/types.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/limits.hh" #include "sim/faults.hh" @@ -54,11 +55,9 @@ template struct DefaultFetchDefaultDecode { - typedef typename Impl::DynInstPtr DynInstPtr; - int size; - DynInstPtr insts[O3MaxWidth]; + O3DynInstPtr insts[O3MaxWidth]; Fault fetchFault; InstSeqNum fetchFaultSN; bool clearFetchFault; @@ -68,34 +67,28 @@ struct DefaultFetchDefaultDecode template struct DefaultDecodeDefaultRename { - typedef typename Impl::DynInstPtr DynInstPtr; - int size; - DynInstPtr insts[O3MaxWidth]; + O3DynInstPtr insts[O3MaxWidth]; }; /** Struct that defines the information passed from rename to IEW. */ template struct DefaultRenameDefaultIEW { - typedef typename Impl::DynInstPtr DynInstPtr; - int size; - DynInstPtr insts[O3MaxWidth]; + O3DynInstPtr insts[O3MaxWidth]; }; /** Struct that defines the information passed from IEW to commit. */ template struct DefaultIEWDefaultCommit { - typedef typename Impl::DynInstPtr DynInstPtr; - int size; - DynInstPtr insts[O3MaxWidth]; - DynInstPtr mispredictInst[O3MaxThreads]; + O3DynInstPtr insts[O3MaxWidth]; + O3DynInstPtr mispredictInst[O3MaxThreads]; Addr mispredPC[O3MaxThreads]; InstSeqNum squashedSeqNum[O3MaxThreads]; TheISA::PCState pc[O3MaxThreads]; @@ -109,23 +102,20 @@ struct DefaultIEWDefaultCommit template struct IssueStruct { - typedef typename Impl::DynInstPtr DynInstPtr; - int size; - DynInstPtr insts[O3MaxWidth]; + O3DynInstPtr insts[O3MaxWidth]; }; /** Struct that defines all backwards communication. */ template struct TimeBufStruct { - typedef typename Impl::DynInstPtr DynInstPtr; - struct decodeComm + struct DecodeComm { TheISA::PCState nextPC; - DynInstPtr mispredictInst; - DynInstPtr squashInst; + O3DynInstPtr mispredictInst; + O3DynInstPtr squashInst; InstSeqNum doneSeqNum; Addr mispredPC; uint64_t branchAddr; @@ -136,15 +126,13 @@ struct TimeBufStruct bool branchTaken; }; - decodeComm decodeInfo[O3MaxThreads]; + DecodeComm decodeInfo[O3MaxThreads]; - struct renameComm - { - }; + struct RenameComm {}; - renameComm renameInfo[O3MaxThreads]; + RenameComm renameInfo[O3MaxThreads]; - struct iewComm + struct IewComm { // Also eventually include skid buffer space. unsigned freeIQEntries; @@ -161,9 +149,9 @@ struct TimeBufStruct bool usedLSQ; }; - iewComm iewInfo[O3MaxThreads]; + IewComm iewInfo[O3MaxThreads]; - struct commitComm + struct CommitComm { ///////////////////////////////////////////////////////////////////// // This code has been re-structured for better packing of variables @@ -184,14 +172,14 @@ struct TimeBufStruct /// Provide fetch the instruction that mispredicted, if this /// pointer is not-null a misprediction occured - DynInstPtr mispredictInst; // *F + O3DynInstPtr mispredictInst; // *F /// Instruction that caused the a non-mispredict squash - DynInstPtr squashInst; // *F + O3DynInstPtr squashInst; // *F /// Hack for now to send back a strictly ordered access to the /// IEW stage. - DynInstPtr strictlyOrderedLoad; // *I + O3DynInstPtr strictlyOrderedLoad; // *I /// Communication specifically to the IQ to tell the IQ that it can /// schedule a non-speculative instruction. @@ -227,7 +215,7 @@ struct TimeBufStruct }; - commitComm commitInfo[O3MaxThreads]; + CommitComm commitInfo[O3MaxThreads]; bool decodeBlock[O3MaxThreads]; bool decodeUnblock[O3MaxThreads]; diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh index 6b01359a0f..bf0b07ca29 100644 --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -46,6 +46,7 @@ #include "base/statistics.hh" #include "cpu/exetrace.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/iew.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/rename_map.hh" @@ -87,7 +88,6 @@ class DefaultCommit public: // Typedefs from the Impl. typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::TimeStruct TimeStruct; typedef typename Impl::FetchStruct FetchStruct; typedef typename Impl::IEWStruct IEWStruct; @@ -126,10 +126,10 @@ class DefaultCommit CommitPolicy commitPolicy; /** Probe Points. */ - ProbePointArg *ppCommit; - ProbePointArg *ppCommitStall; + ProbePointArg *ppCommit; + ProbePointArg *ppCommitStall; /** To probe when an instruction is squashed */ - ProbePointArg *ppSquash; + ProbePointArg *ppSquash; /** Mark the thread as processing a trap. */ void processTrapEvent(ThreadID tid); @@ -277,7 +277,7 @@ class DefaultCommit * @param tid ID of the thread to squash. * @param head_inst Instruction that requested the squash. */ - void squashAfter(ThreadID tid, const DynInstPtr &head_inst); + void squashAfter(ThreadID tid, const O3DynInstPtr &head_inst); /** Handles processing an interrupt. */ void handleInterrupt(); @@ -291,7 +291,7 @@ class DefaultCommit /** Tries to commit the head ROB instruction passed in. * @param head_inst The instruction to be committed. */ - bool commitHead(const DynInstPtr &head_inst, unsigned inst_num); + bool commitHead(const O3DynInstPtr &head_inst, unsigned inst_num); /** Gets instructions from rename and inserts them into the ROB. */ void getInsts(); @@ -385,7 +385,7 @@ class DefaultCommit * that caused a squash since this needs to be passed to the fetch * stage once squashing starts. */ - DynInstPtr squashAfterInst[O3MaxThreads]; + O3DynInstPtr squashAfterInst[O3MaxThreads]; /** Priority List used for Commit Policy */ std::list priority_list; @@ -472,7 +472,7 @@ class DefaultCommit bool avoidQuiesceLiveLock; /** Updates commit stats based on this instruction. */ - void updateComInstStats(const DynInstPtr &inst); + void updateComInstStats(const O3DynInstPtr &inst); // HTM int htmStarts[O3MaxThreads]; diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh index 2c692ea165..06694bf257 100644 --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -54,6 +54,7 @@ #include "cpu/exetrace.hh" #include "cpu/null_static_inst.hh" #include "cpu/o3/commit.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/thread_state.hh" #include "cpu/timebuf.hh" @@ -140,9 +141,12 @@ template void DefaultCommit::regProbePoints() { - ppCommit = new ProbePointArg(cpu->getProbeManager(), "Commit"); - ppCommitStall = new ProbePointArg(cpu->getProbeManager(), "CommitStall"); - ppSquash = new ProbePointArg(cpu->getProbeManager(), "Squash"); + ppCommit = new ProbePointArg( + cpu->getProbeManager(), "Commit"); + ppCommitStall = new ProbePointArg( + cpu->getProbeManager(), "CommitStall"); + ppSquash = new ProbePointArg( + cpu->getProbeManager(), "Squash"); } template @@ -653,7 +657,7 @@ DefaultCommit::squashFromSquashAfter(ThreadID tid) template void -DefaultCommit::squashAfter(ThreadID tid, const DynInstPtr &head_inst) +DefaultCommit::squashAfter(ThreadID tid, const O3DynInstPtr &head_inst) { DPRINTF(Commit, "Executing squash after for [tid:%i] inst [sn:%llu]\n", tid, head_inst->seqNum); @@ -713,14 +717,14 @@ DefaultCommit::tick() // will be active. _nextStatus = Active; - GEM5_VAR_USED const DynInstPtr &inst = rob->readHeadInst(tid); + GEM5_VAR_USED const O3DynInstPtr &inst = rob->readHeadInst(tid); DPRINTF(Commit,"[tid:%i] Instruction [sn:%llu] PC %s is head of" " ROB and ready to commit\n", tid, inst->seqNum, inst->pcState()); } else if (!rob->isEmpty(tid)) { - const DynInstPtr &inst = rob->readHeadInst(tid); + const O3DynInstPtr &inst = rob->readHeadInst(tid); ppCommitStall->notify(inst); @@ -1001,7 +1005,7 @@ DefaultCommit::commitInsts() unsigned num_committed = 0; - DynInstPtr head_inst; + O3DynInstPtr head_inst; // Commit as many instructions as possible until the commit bandwidth // limit is reached, or it becomes impossible to commit any more. @@ -1192,7 +1196,8 @@ DefaultCommit::commitInsts() template bool -DefaultCommit::commitHead(const DynInstPtr &head_inst, unsigned inst_num) +DefaultCommit::commitHead( + const O3DynInstPtr &head_inst, unsigned inst_num) { assert(head_inst); @@ -1391,7 +1396,7 @@ DefaultCommit::getInsts() int insts_to_process = std::min((int)renameWidth, fromRename->size); for (int inst_num = 0; inst_num < insts_to_process; ++inst_num) { - const DynInstPtr &inst = fromRename->insts[inst_num]; + const O3DynInstPtr &inst = fromRename->insts[inst_num]; ThreadID tid = inst->threadNumber; if (!inst->isSquashed() && @@ -1438,7 +1443,7 @@ DefaultCommit::markCompletedInsts() template void -DefaultCommit::updateComInstStats(const DynInstPtr &inst) +DefaultCommit::updateComInstStats(const O3DynInstPtr &inst) { ThreadID tid = inst->threadNumber; @@ -1583,7 +1588,7 @@ DefaultCommit::oldestReady() if (rob->isHeadReady(tid)) { - const DynInstPtr &head_inst = rob->readHeadInst(tid); + const O3DynInstPtr &head_inst = rob->readHeadInst(tid); if (first) { oldest = tid; diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc index ed3d5f1d95..1ed725b8ee 100644 --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -136,7 +136,7 @@ FullO3CPU::FullO3CPU(const DerivO3CPUParams ¶ms) if (params.checker) { BaseCPU *temp_checker = params.checker; - checker = dynamic_cast *>(temp_checker); + checker = dynamic_cast *>(temp_checker); checker->setIcachePort(&this->fetch.getInstPort()); checker->setSystem(params.system); } else { @@ -378,8 +378,11 @@ FullO3CPU::regProbePoints() { BaseCPU::regProbePoints(); - ppInstAccessComplete = new ProbePointArg(getProbeManager(), "InstAccessComplete"); - ppDataAccessComplete = new ProbePointArg >(getProbeManager(), "DataAccessComplete"); + ppInstAccessComplete = new ProbePointArg( + getProbeManager(), "InstAccessComplete"); + ppDataAccessComplete = new ProbePointArg< + std::pair>( + getProbeManager(), "DataAccessComplete"); fetch.regProbePoints(); rename.regProbePoints(); @@ -1501,7 +1504,7 @@ FullO3CPU::squashFromTC(ThreadID tid) template typename FullO3CPU::ListIt -FullO3CPU::addInst(const DynInstPtr &inst) +FullO3CPU::addInst(const O3DynInstPtr &inst) { instList.push_back(inst); @@ -1510,7 +1513,7 @@ FullO3CPU::addInst(const DynInstPtr &inst) template void -FullO3CPU::instDone(ThreadID tid, const DynInstPtr &inst) +FullO3CPU::instDone(ThreadID tid, const O3DynInstPtr &inst) { // Keep an instruction count. if (!inst->isMicroop() || inst->isLastMicroop()) { @@ -1530,7 +1533,7 @@ FullO3CPU::instDone(ThreadID tid, const DynInstPtr &inst) template void -FullO3CPU::removeFrontInst(const DynInstPtr &inst) +FullO3CPU::removeFrontInst(const O3DynInstPtr &inst) { DPRINTF(O3CPU, "Removing committed instruction [tid:%i] PC %s " "[sn:%lli]\n", @@ -1686,7 +1689,7 @@ FullO3CPU::dumpInsts() /* template void -FullO3CPU::wakeDependents(const DynInstPtr &inst) +FullO3CPU::wakeDependents(const O3DynInstPtr &inst) { iew.wakeDependents(inst); } diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh index 196f57d124..fbf402e9d9 100644 --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -56,6 +56,7 @@ #include "cpu/o3/comm.hh" #include "cpu/o3/commit.hh" #include "cpu/o3/decode.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/fetch.hh" #include "cpu/o3/free_list.hh" #include "cpu/o3/iew.hh" @@ -100,13 +101,12 @@ class FullO3CPU : public BaseO3CPU { public: // Typedefs from the Impl here. - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::O3CPU O3CPU; typedef O3ThreadState ImplState; typedef O3ThreadState Thread; - typedef typename std::list::iterator ListIt; + typedef typename std::list::iterator ListIt; friend class O3ThreadContext; @@ -184,7 +184,7 @@ class FullO3CPU : public BaseO3CPU ~FullO3CPU(); ProbePointArg *ppInstAccessComplete; - ProbePointArg > *ppDataAccessComplete; + ProbePointArg > *ppDataAccessComplete; /** Register probe points. */ void regProbePoints() override; @@ -439,15 +439,15 @@ class FullO3CPU : public BaseO3CPU /** Function to add instruction onto the head of the list of the * instructions. Used when new instructions are fetched. */ - ListIt addInst(const DynInstPtr &inst); + ListIt addInst(const O3DynInstPtr &inst); /** Function to tell the CPU that an instruction has completed. */ - void instDone(ThreadID tid, const DynInstPtr &inst); + void instDone(ThreadID tid, const O3DynInstPtr &inst); /** Remove an instruction from the front end of the list. There's * no restriction on location of the instruction. */ - void removeFrontInst(const DynInstPtr &inst); + void removeFrontInst(const O3DynInstPtr &inst); /** Remove all instructions that are not currently in the ROB. * There's also an option to not squash delay slot instructions.*/ @@ -472,7 +472,7 @@ class FullO3CPU : public BaseO3CPU #endif /** List of all the instructions in flight. */ - std::list instList; + std::list instList; /** List of all the instructions that will be removed at the end of this * cycle. @@ -624,7 +624,7 @@ class FullO3CPU : public BaseO3CPU * instruction results at run time. This can be set to NULL if it * is not being used. */ - Checker *checker; + Checker *checker; /** Pointer to the system. */ System *system; @@ -648,7 +648,7 @@ class FullO3CPU : public BaseO3CPU std::vector tids; /** CPU pushRequest function, forwards request to LSQ. */ - Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, + Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op = nullptr, const std::vector& byte_enable = diff --git a/src/cpu/o3/decode.hh b/src/cpu/o3/decode.hh index c694e3c380..38ba0a6cd3 100644 --- a/src/cpu/o3/decode.hh +++ b/src/cpu/o3/decode.hh @@ -44,6 +44,7 @@ #include #include "base/statistics.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/limits.hh" #include "cpu/timebuf.hh" @@ -62,7 +63,6 @@ class DefaultDecode private: // Typedefs from the Impl. typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::FetchStruct FetchStruct; typedef typename Impl::DecodeStruct DecodeStruct; typedef typename Impl::TimeStruct TimeStruct; @@ -193,7 +193,7 @@ class DefaultDecode /** Squashes if there is a PC-relative branch that was predicted * incorrectly. Sends squash information back to fetch. */ - void squash(const DynInstPtr &inst, ThreadID tid); + void squash(const O3DynInstPtr &inst, ThreadID tid); public: /** Squashes due to commit signalling a squash. Changes status to @@ -235,10 +235,10 @@ class DefaultDecode typename TimeBuffer::wire fromFetch; /** Queue of all instructions coming from fetch this cycle. */ - std::queue insts[O3MaxThreads]; + std::queue insts[O3MaxThreads]; /** Skid buffer between fetch and decode. */ - std::queue skidBuffer[O3MaxThreads]; + std::queue skidBuffer[O3MaxThreads]; /** Variable that tracks if decode has written to the time buffer this * cycle. Used to tell CPU if there is activity this cycle. @@ -285,7 +285,7 @@ class DefaultDecode Addr bdelayDoneSeqNum[O3MaxThreads]; /** Instruction used for squashing branch (used for MIPS)*/ - DynInstPtr squashInst[O3MaxThreads]; + O3DynInstPtr squashInst[O3MaxThreads]; /** Tells when their is a pending delay slot inst. to send * to rename. If there is, then wait squash after the next diff --git a/src/cpu/o3/decode_impl.hh b/src/cpu/o3/decode_impl.hh index 5a78bac328..bfbf98f67b 100644 --- a/src/cpu/o3/decode_impl.hh +++ b/src/cpu/o3/decode_impl.hh @@ -46,6 +46,7 @@ #include "config/the_isa.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/decode.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/o3/limits.hh" #include "debug/Activity.hh" #include "debug/Decode.hh" @@ -293,7 +294,7 @@ DefaultDecode::unblock(ThreadID tid) template void -DefaultDecode::squash(const DynInstPtr &inst, ThreadID tid) +DefaultDecode::squash(const O3DynInstPtr &inst, ThreadID tid) { DPRINTF(Decode, "[tid:%i] [sn:%llu] Squashing due to incorrect branch " "prediction detected at decode.\n", tid, inst->seqNum); @@ -395,7 +396,7 @@ template void DefaultDecode::skidInsert(ThreadID tid) { - DynInstPtr inst = NULL; + O3DynInstPtr inst = NULL; while (!insts[tid].empty()) { inst = insts[tid].front(); @@ -655,7 +656,7 @@ DefaultDecode::decodeInsts(ThreadID tid) ++stats.runCycles; } - std::queue + std::queue &insts_to_decode = decodeStatus[tid] == Unblocking ? skidBuffer[tid] : insts[tid]; @@ -664,7 +665,7 @@ DefaultDecode::decodeInsts(ThreadID tid) while (insts_available > 0 && toRenameIndex < decodeWidth) { assert(!insts_to_decode.empty()); - DynInstPtr inst = std::move(insts_to_decode.front()); + O3DynInstPtr inst = std::move(insts_to_decode.front()); insts_to_decode.pop(); diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 0e9ad5daa8..0904800ed3 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -57,6 +57,7 @@ #include "cpu/inst_res.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/cpu.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/isa_specific.hh" #include "cpu/o3/lsq_unit.hh" #include "cpu/op_class.hh" @@ -67,10 +68,6 @@ class Packet; -class BaseO3DynInst; - -using O3DynInstPtr = RefCountingPtr; - class BaseO3DynInst : public ExecContext, public RefCounted { public: diff --git a/src/cpu/o3/dyn_inst_ptr.hh b/src/cpu/o3/dyn_inst_ptr.hh new file mode 100644 index 0000000000..479d175c82 --- /dev/null +++ b/src/cpu/o3/dyn_inst_ptr.hh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2010, 2016 ARM Limited + * Copyright (c) 2013 Advanced Micro Devices, Inc. + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CPU_O3_DYN_INST_PTR_HH__ +#define __CPU_O3_DYN_INST_PTR_HH__ + +#include "base/refcnt.hh" + +class BaseO3DynInst; + +using O3DynInstPtr = RefCountingPtr; +using O3DynInstConstPtr = RefCountingPtr; + +#endif // __CPU_O3_DYN_INST_PTR_HH__ diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index dd7b5a0af9..dee344b414 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -44,6 +44,7 @@ #include "arch/decoder.hh" #include "base/statistics.hh" #include "config/the_isa.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/limits.hh" #include "cpu/pc_event.hh" #include "cpu/pred/bpred_unit.hh" @@ -72,8 +73,6 @@ class DefaultFetch { public: /** Typedefs from Impl. */ - typedef typename Impl::DynInst DynInst; - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::O3CPU O3CPU; typedef typename Impl::FetchStruct FetchStruct; typedef typename Impl::TimeStruct TimeStruct; @@ -207,7 +206,7 @@ class DefaultFetch std::list priorityList; /** Probe points. */ - ProbePointArg *ppFetch; + ProbePointArg *ppFetch; /** To probe when a fetch request is successfully sent. */ ProbePointArg *ppFetchRequestSent; @@ -294,7 +293,7 @@ class DefaultFetch * @param next_NPC Used for ISAs which use delay slots. * @return Whether or not a branch was predicted as taken. */ - bool lookupAndUpdateNextPC(const DynInstPtr &inst, TheISA::PCState &pc); + bool lookupAndUpdateNextPC(const O3DynInstPtr &inst, TheISA::PCState &pc); /** * Fetches the cache line that contains the fetch PC. Returns any @@ -321,14 +320,14 @@ class DefaultFetch /** Squashes a specific thread and resets the PC. */ inline void doSquash(const TheISA::PCState &newPC, - const DynInstPtr squashInst, ThreadID tid); + const O3DynInstPtr squashInst, ThreadID tid); /** Squashes a specific thread and resets the PC. Also tells the CPU to * remove any instructions between fetch and decode * that should be sqaushed. */ void squashFromDecode(const TheISA::PCState &newPC, - const DynInstPtr squashInst, + const O3DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid); /** Checks if a thread is stalled. */ @@ -344,7 +343,7 @@ class DefaultFetch * squash should be the commit stage. */ void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num, - DynInstPtr squashInst, ThreadID tid); + O3DynInstPtr squashInst, ThreadID tid); /** Ticks the fetch stage, processing all inputs signals and fetching * as many instructions as possible. @@ -375,9 +374,9 @@ class DefaultFetch RequestPort &getInstPort() { return icachePort; } private: - DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, - StaticInstPtr curMacroop, TheISA::PCState thisPC, - TheISA::PCState nextPC, bool trace); + O3DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst, + StaticInstPtr curMacroop, TheISA::PCState thisPC, + TheISA::PCState nextPC, bool trace); /** Returns the appropriate thread to fetch, given the fetch policy. */ ThreadID getFetchingThread(); @@ -505,7 +504,7 @@ class DefaultFetch unsigned fetchQueueSize; /** Queue of fetched instructions. Per-thread to prevent HoL blocking. */ - std::deque fetchQueue[O3MaxThreads]; + std::deque fetchQueue[O3MaxThreads]; /** Whether or not the fetch buffer data is valid. */ bool fetchBufferValid[O3MaxThreads]; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh index b1ae2e5b4b..587ae1ae18 100644 --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -150,7 +150,7 @@ template void DefaultFetch::regProbePoints() { - ppFetch = new ProbePointArg(cpu->getProbeManager(), "Fetch"); + ppFetch = new ProbePointArg(cpu->getProbeManager(), "Fetch"); ppFetchRequestSent = new ProbePointArg(cpu->getProbeManager(), "FetchRequest"); @@ -526,7 +526,7 @@ DefaultFetch::deactivateThread(ThreadID tid) template bool DefaultFetch::lookupAndUpdateNextPC( - const DynInstPtr &inst, TheISA::PCState &nextPC) + const O3DynInstPtr &inst, TheISA::PCState &nextPC) { // Do branch prediction check here. // A bit of a misnomer...next_PC is actually the current PC until @@ -706,7 +706,7 @@ DefaultFetch::finishTranslation(const Fault &fault, DPRINTF(Fetch, "[tid:%i] Translation faulted, building noop.\n", tid); // We will use a nop in ordier to carry the fault. - DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr, + O3DynInstPtr instruction = buildInst(tid, nopStaticInstPtr, nullptr, fetchPC, fetchPC, false); instruction->setNotAnInst(); @@ -729,7 +729,7 @@ DefaultFetch::finishTranslation(const Fault &fault, template inline void DefaultFetch::doSquash(const TheISA::PCState &newPC, - const DynInstPtr squashInst, ThreadID tid) + const O3DynInstPtr squashInst, ThreadID tid) { DPRINTF(Fetch, "[tid:%i] Squashing, setting PC to: %s.\n", tid, newPC); @@ -781,7 +781,7 @@ DefaultFetch::doSquash(const TheISA::PCState &newPC, template void DefaultFetch::squashFromDecode(const TheISA::PCState &newPC, - const DynInstPtr squashInst, + const O3DynInstPtr squashInst, const InstSeqNum seq_num, ThreadID tid) { DPRINTF(Fetch, "[tid:%i] Squashing from decode.\n", tid); @@ -851,7 +851,7 @@ DefaultFetch::updateFetchStatus() template void DefaultFetch::squash(const TheISA::PCState &newPC, - const InstSeqNum seq_num, DynInstPtr squashInst, + const InstSeqNum seq_num, O3DynInstPtr squashInst, ThreadID tid) { DPRINTF(Fetch, "[tid:%i] Squash from commit.\n", tid); @@ -1070,7 +1070,7 @@ DefaultFetch::checkSignalsAndUpdate(ThreadID tid) } template -typename Impl::DynInstPtr +O3DynInstPtr DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst, StaticInstPtr curMacroop, TheISA::PCState thisPC, TheISA::PCState nextPC, bool trace) @@ -1079,8 +1079,8 @@ DefaultFetch::buildInst(ThreadID tid, StaticInstPtr staticInst, InstSeqNum seq = cpu->getAndIncrementInstSeq(); // Create a new DynInst from the instruction fetched. - DynInstPtr instruction = - new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); + O3DynInstPtr instruction = + new BaseO3DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu); instruction->setTid(tid); instruction->setThreadState(cpu->thread[tid]); @@ -1297,7 +1297,7 @@ DefaultFetch::fetch(bool &status_change) newMacro |= staticInst->isLastMicroop(); } - DynInstPtr instruction = + O3DynInstPtr instruction = buildInst(tid, staticInst, curMacroop, thisPC, nextPC, true); diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh index 687f745d14..4afee5bf76 100644 --- a/src/cpu/o3/iew.hh +++ b/src/cpu/o3/iew.hh @@ -46,6 +46,7 @@ #include "base/statistics.hh" #include "cpu/o3/comm.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/inst_queue.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/lsq.hh" @@ -81,7 +82,6 @@ class DefaultIEW { private: //Typedefs from Impl - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::O3CPU O3CPU; typedef typename Impl::TimeStruct TimeStruct; typedef typename Impl::IEWStruct IEWStruct; @@ -120,12 +120,12 @@ class DefaultIEW StageStatus wbStatus; /** Probe points. */ - ProbePointArg *ppMispredict; - ProbePointArg *ppDispatch; + ProbePointArg *ppMispredict; + ProbePointArg *ppDispatch; /** To probe when instruction execution begins. */ - ProbePointArg *ppExecute; + ProbePointArg *ppExecute; /** To probe when instruction execution is complete. */ - ProbePointArg *ppToCommit; + ProbePointArg *ppToCommit; public: /** Constructs a DefaultIEW with the given parameters. */ @@ -171,24 +171,24 @@ class DefaultIEW void squash(ThreadID tid); /** Wakes all dependents of a completed instruction. */ - void wakeDependents(const DynInstPtr &inst); + void wakeDependents(const O3DynInstPtr &inst); /** Tells memory dependence unit that a memory instruction needs to be * rescheduled. It will re-execute once replayMemInst() is called. */ - void rescheduleMemInst(const DynInstPtr &inst); + void rescheduleMemInst(const O3DynInstPtr &inst); /** Re-executes all rescheduled memory instructions. */ - void replayMemInst(const DynInstPtr &inst); + void replayMemInst(const O3DynInstPtr &inst); /** Moves memory instruction onto the list of cache blocked instructions */ - void blockMemInst(const DynInstPtr &inst); + void blockMemInst(const O3DynInstPtr &inst); /** Notifies that the cache has become unblocked */ void cacheUnblocked(); /** Sends an instruction to commit through the time buffer. */ - void instToCommit(const DynInstPtr &inst); + void instToCommit(const O3DynInstPtr &inst); /** Inserts unused instructions of a thread into the skid buffer. */ void skidInsert(ThreadID tid); @@ -226,7 +226,7 @@ class DefaultIEW bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); } /** Check misprediction */ - void checkMisprediction(const DynInstPtr &inst); + void checkMisprediction(const O3DynInstPtr &inst); // hardware transactional memory // For debugging purposes, it is useful to keep track of the most recent @@ -242,12 +242,12 @@ class DefaultIEW /** Sends commit proper information for a squash due to a branch * mispredict. */ - void squashDueToBranch(const DynInstPtr &inst, ThreadID tid); + void squashDueToBranch(const O3DynInstPtr &inst, ThreadID tid); /** Sends commit proper information for a squash due to a memory order * violation. */ - void squashDueToMemOrder(const DynInstPtr &inst, ThreadID tid); + void squashDueToMemOrder(const O3DynInstPtr &inst, ThreadID tid); /** Sets Dispatch to blocked, and signals back to other stages to block. */ void block(ThreadID tid); @@ -301,7 +301,7 @@ class DefaultIEW private: /** Updates execution stats based on the instruction. */ - void updateExeInstStats(const DynInstPtr &inst); + void updateExeInstStats(const O3DynInstPtr &inst); /** Pointer to main time buffer used for backwards communication. */ TimeBuffer *timeBuffer; @@ -337,10 +337,10 @@ class DefaultIEW typename TimeBuffer::wire toCommit; /** Queue of all instructions coming from rename this cycle. */ - std::queue insts[O3MaxThreads]; + std::queue insts[O3MaxThreads]; /** Skid buffer between rename and IEW. */ - std::queue skidBuffer[O3MaxThreads]; + std::queue skidBuffer[O3MaxThreads]; /** Scoreboard pointer. */ Scoreboard* scoreboard; diff --git a/src/cpu/o3/iew_impl.hh b/src/cpu/o3/iew_impl.hh index d8a539847b..7c6fe5a6a9 100644 --- a/src/cpu/o3/iew_impl.hh +++ b/src/cpu/o3/iew_impl.hh @@ -50,6 +50,7 @@ #include "config/the_isa.hh" #include "cpu/checker/cpu.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/o3/fu_pool.hh" #include "cpu/o3/iew.hh" #include "cpu/o3/limits.hh" @@ -122,20 +123,22 @@ template void DefaultIEW::regProbePoints() { - ppDispatch = new ProbePointArg(cpu->getProbeManager(), "Dispatch"); - ppMispredict = new ProbePointArg(cpu->getProbeManager(), "Mispredict"); + ppDispatch = new ProbePointArg( + cpu->getProbeManager(), "Dispatch"); + ppMispredict = new ProbePointArg( + cpu->getProbeManager(), "Mispredict"); /** * Probe point with dynamic instruction as the argument used to probe when * an instruction starts to execute. */ - ppExecute = new ProbePointArg(cpu->getProbeManager(), - "Execute"); + ppExecute = new ProbePointArg( + cpu->getProbeManager(), "Execute"); /** * Probe point with dynamic instruction as the argument used to probe when * an instruction execution completes and it is marked ready to commit. */ - ppToCommit = new ProbePointArg(cpu->getProbeManager(), - "ToCommit"); + ppToCommit = new ProbePointArg( + cpu->getProbeManager(), "ToCommit"); } template @@ -461,7 +464,7 @@ DefaultIEW::squash(ThreadID tid) template void -DefaultIEW::squashDueToBranch(const DynInstPtr& inst, ThreadID tid) +DefaultIEW::squashDueToBranch(const O3DynInstPtr& inst, ThreadID tid) { DPRINTF(IEW, "[tid:%i] [sn:%llu] Squashing from a specific instruction," " PC: %s " @@ -487,7 +490,7 @@ DefaultIEW::squashDueToBranch(const DynInstPtr& inst, ThreadID tid) template void -DefaultIEW::squashDueToMemOrder(const DynInstPtr& inst, ThreadID tid) +DefaultIEW::squashDueToMemOrder(const O3DynInstPtr& inst, ThreadID tid) { DPRINTF(IEW, "[tid:%i] Memory violation, squashing violator and younger " "insts, PC: %s [sn:%llu].\n", tid, inst->pcState(), inst->seqNum); @@ -550,28 +553,28 @@ DefaultIEW::unblock(ThreadID tid) template void -DefaultIEW::wakeDependents(const DynInstPtr& inst) +DefaultIEW::wakeDependents(const O3DynInstPtr& inst) { instQueue.wakeDependents(inst); } template void -DefaultIEW::rescheduleMemInst(const DynInstPtr& inst) +DefaultIEW::rescheduleMemInst(const O3DynInstPtr& inst) { instQueue.rescheduleMemInst(inst); } template void -DefaultIEW::replayMemInst(const DynInstPtr& inst) +DefaultIEW::replayMemInst(const O3DynInstPtr& inst) { instQueue.replayMemInst(inst); } template void -DefaultIEW::blockMemInst(const DynInstPtr& inst) +DefaultIEW::blockMemInst(const O3DynInstPtr& inst) { instQueue.blockMemInst(inst); } @@ -585,7 +588,7 @@ DefaultIEW::cacheUnblocked() template void -DefaultIEW::instToCommit(const DynInstPtr& inst) +DefaultIEW::instToCommit(const O3DynInstPtr& inst) { // This function should not be called after writebackInsts in a // single cycle. That will cause problems with an instruction @@ -630,7 +633,7 @@ template void DefaultIEW::skidInsert(ThreadID tid) { - DynInstPtr inst = NULL; + O3DynInstPtr inst = NULL; while (!insts[tid].empty()) { inst = insts[tid].front(); @@ -927,13 +930,13 @@ DefaultIEW::dispatchInsts(ThreadID tid) { // Obtain instructions from skid buffer if unblocking, or queue from rename // otherwise. - std::queue &insts_to_dispatch = + std::queue &insts_to_dispatch = dispatchStatus[tid] == Unblocking ? skidBuffer[tid] : insts[tid]; int insts_to_add = insts_to_dispatch.size(); - DynInstPtr inst; + O3DynInstPtr inst; bool add_to_iq = false; int dis_num_inst = 0; @@ -1208,7 +1211,7 @@ DefaultIEW::executeInsts() DPRINTF(IEW, "Execute: Executing instructions from IQ.\n"); - DynInstPtr inst = instQueue.getInstToExecute(); + O3DynInstPtr inst = instQueue.getInstToExecute(); DPRINTF(IEW, "Execute: Processing PC %s, [tid:%i] [sn:%llu].\n", inst->pcState(), inst->threadNumber,inst->seqNum); @@ -1372,7 +1375,7 @@ DefaultIEW::executeInsts() // If there was an ordering violation, then get the // DynInst that caused the violation. Note that this // clears the violation signal. - DynInstPtr violator; + O3DynInstPtr violator; violator = ldstQueue.getMemDepViolator(tid); DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s " @@ -1396,7 +1399,7 @@ DefaultIEW::executeInsts() if (ldstQueue.violation(tid)) { assert(inst->isMemRef()); - DynInstPtr violator = ldstQueue.getMemDepViolator(tid); + O3DynInstPtr violator = ldstQueue.getMemDepViolator(tid); DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: " "%s, inst PC: %s. Addr is: %#x.\n", @@ -1439,7 +1442,7 @@ DefaultIEW::writebackInsts() // as part of backwards communication. for (int inst_num = 0; inst_num < wbWidth && toCommit->insts[inst_num]; inst_num++) { - DynInstPtr inst = toCommit->insts[inst_num]; + O3DynInstPtr inst = toCommit->insts[inst_num]; ThreadID tid = inst->threadNumber; DPRINTF(IEW, "Sending instructions to commit, [sn:%lli] PC %s.\n", @@ -1610,7 +1613,7 @@ DefaultIEW::tick() template void -DefaultIEW::updateExeInstStats(const DynInstPtr& inst) +DefaultIEW::updateExeInstStats(const O3DynInstPtr& inst) { ThreadID tid = inst->threadNumber; @@ -1642,7 +1645,7 @@ DefaultIEW::updateExeInstStats(const DynInstPtr& inst) template void -DefaultIEW::checkMisprediction(const DynInstPtr& inst) +DefaultIEW::checkMisprediction(const O3DynInstPtr& inst) { ThreadID tid = inst->threadNumber; diff --git a/src/cpu/o3/impl.hh b/src/cpu/o3/impl.hh index c61367f46e..2c7242ea60 100644 --- a/src/cpu/o3/impl.hh +++ b/src/cpu/o3/impl.hh @@ -32,8 +32,6 @@ #include "cpu/o3/comm.hh" // Forward declarations. -class BaseO3DynInst; - template class FullO3CPU; @@ -66,15 +64,6 @@ struct O3CPUImpl typedef TimeBufStruct TimeStruct; - /** The DynInst type to be used. */ - typedef BaseO3DynInst DynInst; - - /** The refcounted DynInst pointer to be used. In most cases this is - * what should be used, and not DynInst *. - */ - typedef RefCountingPtr DynInstPtr; - typedef RefCountingPtr DynInstConstPtr; - /** The O3CPU type to be used. */ typedef FullO3CPU O3CPU; diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index 2b79e9cb77..6c85ffd87e 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -51,6 +51,7 @@ #include "base/types.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/dep_graph.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/mem_dep_unit.hh" #include "cpu/o3/store_set.hh" @@ -89,19 +90,18 @@ class InstructionQueue public: //Typedefs from the Impl. typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::IssueStruct IssueStruct; typedef typename Impl::TimeStruct TimeStruct; // Typedef of iterator through the list of instructions. - typedef typename std::list::iterator ListIt; + typedef typename std::list::iterator ListIt; /** FU completion event class. */ class FUCompletion : public Event { private: /** Executing instruction. */ - DynInstPtr inst; + O3DynInstPtr inst; /** Index of the FU used for executing. */ int fuIdx; @@ -116,7 +116,7 @@ class InstructionQueue public: /** Construct a FU completion event. */ - FUCompletion(const DynInstPtr &_inst, int fu_idx, + FUCompletion(const O3DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr); virtual void process(); @@ -177,40 +177,43 @@ class InstructionQueue bool hasReadyInsts(); /** Inserts a new instruction into the IQ. */ - void insert(const DynInstPtr &new_inst); + void insert(const O3DynInstPtr &new_inst); /** Inserts a new, non-speculative instruction into the IQ. */ - void insertNonSpec(const DynInstPtr &new_inst); + void insertNonSpec(const O3DynInstPtr &new_inst); /** Inserts a memory or write barrier into the IQ to make sure * loads and stores are ordered properly. */ - void insertBarrier(const DynInstPtr &barr_inst); + void insertBarrier(const O3DynInstPtr &barr_inst); /** Returns the oldest scheduled instruction, and removes it from * the list of instructions waiting to execute. */ - DynInstPtr getInstToExecute(); + O3DynInstPtr getInstToExecute(); /** Gets a memory instruction that was referred due to a delayed DTB * translation if it is now ready to execute. NULL if none available. */ - DynInstPtr getDeferredMemInstToExecute(); + O3DynInstPtr getDeferredMemInstToExecute(); /** Gets a memory instruction that was blocked on the cache. NULL if none * available. */ - DynInstPtr getBlockedMemInstToExecute(); + O3DynInstPtr getBlockedMemInstToExecute(); /** * Records the instruction as the producer of a register without * adding it to the rest of the IQ. */ - void recordProducer(const DynInstPtr &inst) - { addToProducers(inst); } + void + recordProducer(const O3DynInstPtr &inst) + { + addToProducers(inst); + } /** Process FU completion event. */ - void processFUCompletion(const DynInstPtr &inst, int fu_idx); + void processFUCompletion(const O3DynInstPtr &inst, int fu_idx); /** * Schedules ready instructions, adding the ready ones (oldest first) to @@ -228,34 +231,35 @@ class InstructionQueue void commit(const InstSeqNum &inst, ThreadID tid = 0); /** Wakes all dependents of a completed instruction. */ - int wakeDependents(const DynInstPtr &completed_inst); + int wakeDependents(const O3DynInstPtr &completed_inst); /** Adds a ready memory instruction to the ready list. */ - void addReadyMemInst(const DynInstPtr &ready_inst); + void addReadyMemInst(const O3DynInstPtr &ready_inst); /** * Reschedules a memory instruction. It will be ready to issue once * replayMemInst() is called. */ - void rescheduleMemInst(const DynInstPtr &resched_inst); + void rescheduleMemInst(const O3DynInstPtr &resched_inst); /** Replays a memory instruction. It must be rescheduled first. */ - void replayMemInst(const DynInstPtr &replay_inst); + void replayMemInst(const O3DynInstPtr &replay_inst); /** * Defers a memory instruction when its DTB translation incurs a hw * page table walk. */ - void deferMemInst(const DynInstPtr &deferred_inst); + void deferMemInst(const O3DynInstPtr &deferred_inst); /** Defers a memory instruction when it is cache blocked. */ - void blockMemInst(const DynInstPtr &blocked_inst); + void blockMemInst(const O3DynInstPtr &blocked_inst); /** Notify instruction queue that a previous blockage has resolved */ void cacheUnblocked(); /** Indicates an ordering violation between a store and a load. */ - void violation(const DynInstPtr &store, const DynInstPtr &faulting_load); + void violation(const O3DynInstPtr &store, + const O3DynInstPtr &faulting_load); /** * Squashes instructions for a thread. Squashing information is obtained @@ -310,23 +314,23 @@ class InstructionQueue ////////////////////////////////////// /** List of all the instructions in the IQ (some of which may be issued). */ - std::list instList[O3MaxThreads]; + std::list instList[O3MaxThreads]; /** List of instructions that are ready to be executed. */ - std::list instsToExecute; + std::list instsToExecute; /** List of instructions waiting for their DTB translation to * complete (hw page table walk in progress). */ - std::list deferredMemInsts; + std::list deferredMemInsts; /** List of instructions that have been cache blocked. */ - std::list blockedMemInsts; + std::list blockedMemInsts; /** List of instructions that were cache blocked, but a retry has been seen * since, so they can now be retried. May fail again go on the blocked list. */ - std::list retryMemInsts; + std::list retryMemInsts; /** * Struct for comparing entries to be added to the priority queue. @@ -335,16 +339,14 @@ class InstructionQueue * numbers (and hence are older) will be at the top of the * priority queue. */ - struct pqCompare + struct PqCompare { - bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const - { - return lhs->seqNum > rhs->seqNum; - } + bool operator()(const O3DynInstPtr &lhs, + const O3DynInstPtr &rhs) const; }; - typedef std::priority_queue, pqCompare> - ReadyInstQueue; + typedef std::priority_queue< + O3DynInstPtr, std::vector, PqCompare> ReadyInstQueue; /** List of ready instructions, per op class. They are separated by op * class to allow for easy mapping to FUs. @@ -358,9 +360,9 @@ class InstructionQueue * the sequence number will be available. Thus it is most efficient to be * able to search by the sequence number alone. */ - std::map nonSpecInsts; + std::map nonSpecInsts; - typedef typename std::map::iterator NonSpecMapIt; + typedef typename std::map::iterator NonSpecMapIt; /** Entry for the list age ordering by op class. */ struct ListOrderEntry @@ -397,7 +399,7 @@ class InstructionQueue */ void moveToYoungerInst(ListOrderIt age_order_it); - DependencyGraph dependGraph; + DependencyGraph dependGraph; ////////////////////////////////////// // Various parameters @@ -450,13 +452,13 @@ class InstructionQueue std::vector regScoreboard; /** Adds an instruction to the dependency graph, as a consumer. */ - bool addToDependents(const DynInstPtr &new_inst); + bool addToDependents(const O3DynInstPtr &new_inst); /** Adds an instruction to the dependency graph, as a producer. */ - void addToProducers(const DynInstPtr &new_inst); + void addToProducers(const O3DynInstPtr &new_inst); /** Moves an instruction to the ready queue if it is ready. */ - void addIfReady(const DynInstPtr &inst); + void addIfReady(const O3DynInstPtr &inst); /** Debugging function to count how many entries are in the IQ. It does * a linear walk through the instructions, so do not call this function diff --git a/src/cpu/o3/inst_queue_impl.hh b/src/cpu/o3/inst_queue_impl.hh index d4328148b2..9373f6b879 100644 --- a/src/cpu/o3/inst_queue_impl.hh +++ b/src/cpu/o3/inst_queue_impl.hh @@ -46,6 +46,7 @@ #include #include "base/logging.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/o3/fu_pool.hh" #include "cpu/o3/inst_queue.hh" #include "cpu/o3/limits.hh" @@ -59,7 +60,7 @@ using std::list; template -InstructionQueue::FUCompletion::FUCompletion(const DynInstPtr &_inst, +InstructionQueue::FUCompletion::FUCompletion(const O3DynInstPtr &_inst, int fu_idx, InstructionQueue *iq_ptr) : Event(Stat_Event_Pri, AutoDelete), inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), freeFU(false) @@ -576,7 +577,7 @@ InstructionQueue::hasReadyInsts() template void -InstructionQueue::insert(const DynInstPtr &new_inst) +InstructionQueue::insert(const O3DynInstPtr &new_inst) { if (new_inst->isFloating()) { iqIOStats.fpInstQueueWrites++; @@ -622,7 +623,7 @@ InstructionQueue::insert(const DynInstPtr &new_inst) template void -InstructionQueue::insertNonSpec(const DynInstPtr &new_inst) +InstructionQueue::insertNonSpec(const O3DynInstPtr &new_inst) { // @todo: Clean up this code; can do it by setting inst as unable // to issue, then calling normal insert on the inst. @@ -669,7 +670,7 @@ InstructionQueue::insertNonSpec(const DynInstPtr &new_inst) template void -InstructionQueue::insertBarrier(const DynInstPtr &barr_inst) +InstructionQueue::insertBarrier(const O3DynInstPtr &barr_inst) { memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst); @@ -677,11 +678,11 @@ InstructionQueue::insertBarrier(const DynInstPtr &barr_inst) } template -typename Impl::DynInstPtr +O3DynInstPtr InstructionQueue::getInstToExecute() { assert(!instsToExecute.empty()); - DynInstPtr inst = std::move(instsToExecute.front()); + O3DynInstPtr inst = std::move(instsToExecute.front()); instsToExecute.pop_front(); if (inst->isFloating()) { iqIOStats.fpInstQueueReads++; @@ -748,7 +749,8 @@ InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it) template void -InstructionQueue::processFUCompletion(const DynInstPtr &inst, int fu_idx) +InstructionQueue::processFUCompletion( + const O3DynInstPtr &inst, int fu_idx) { DPRINTF(IQ, "Processing FU completion [sn:%llu]\n", inst->seqNum); assert(!cpu->switchedOut()); @@ -779,7 +781,7 @@ InstructionQueue::scheduleReadyInsts() IssueStruct *i2e_info = issueToExecuteQueue->access(0); - DynInstPtr mem_inst; + O3DynInstPtr mem_inst; while ((mem_inst = std::move(getDeferredMemInstToExecute()))) { addReadyMemInst(mem_inst); } @@ -806,7 +808,7 @@ InstructionQueue::scheduleReadyInsts() assert(!readyInsts[op_class].empty()); - DynInstPtr issuing_inst = readyInsts[op_class].top(); + O3DynInstPtr issuing_inst = readyInsts[op_class].top(); if (issuing_inst->isFloating()) { iqIOStats.fpInstQueueReads++; @@ -986,7 +988,7 @@ InstructionQueue::commit(const InstSeqNum &inst, ThreadID tid) template int -InstructionQueue::wakeDependents(const DynInstPtr &completed_inst) +InstructionQueue::wakeDependents(const O3DynInstPtr &completed_inst) { int dependents = 0; @@ -1054,7 +1056,7 @@ InstructionQueue::wakeDependents(const DynInstPtr &completed_inst) //Go through the dependency chain, marking the registers as //ready within the waiting instructions. - DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex()); + O3DynInstPtr dep_inst = dependGraph.pop(dest_reg->flatIndex()); while (dep_inst) { DPRINTF(IQ, "Waking up a dependent instruction, [sn:%llu] " @@ -1086,7 +1088,7 @@ InstructionQueue::wakeDependents(const DynInstPtr &completed_inst) template void -InstructionQueue::addReadyMemInst(const DynInstPtr &ready_inst) +InstructionQueue::addReadyMemInst(const O3DynInstPtr &ready_inst) { OpClass op_class = ready_inst->opClass(); @@ -1109,7 +1111,7 @@ InstructionQueue::addReadyMemInst(const DynInstPtr &ready_inst) template void -InstructionQueue::rescheduleMemInst(const DynInstPtr &resched_inst) +InstructionQueue::rescheduleMemInst(const O3DynInstPtr &resched_inst) { DPRINTF(IQ, "Rescheduling mem inst [sn:%llu]\n", resched_inst->seqNum); @@ -1123,21 +1125,21 @@ InstructionQueue::rescheduleMemInst(const DynInstPtr &resched_inst) template void -InstructionQueue::replayMemInst(const DynInstPtr &replay_inst) +InstructionQueue::replayMemInst(const O3DynInstPtr &replay_inst) { memDepUnit[replay_inst->threadNumber].replay(); } template void -InstructionQueue::deferMemInst(const DynInstPtr &deferred_inst) +InstructionQueue::deferMemInst(const O3DynInstPtr &deferred_inst) { deferredMemInsts.push_back(deferred_inst); } template void -InstructionQueue::blockMemInst(const DynInstPtr &blocked_inst) +InstructionQueue::blockMemInst(const O3DynInstPtr &blocked_inst) { blocked_inst->clearIssued(); blocked_inst->clearCanIssue(); @@ -1154,13 +1156,13 @@ InstructionQueue::cacheUnblocked() } template -typename Impl::DynInstPtr +O3DynInstPtr InstructionQueue::getDeferredMemInstToExecute() { for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); ++it) { if ((*it)->translationCompleted() || (*it)->isSquashed()) { - DynInstPtr mem_inst = std::move(*it); + O3DynInstPtr mem_inst = std::move(*it); deferredMemInsts.erase(it); return mem_inst; } @@ -1169,13 +1171,13 @@ InstructionQueue::getDeferredMemInstToExecute() } template -typename Impl::DynInstPtr +O3DynInstPtr InstructionQueue::getBlockedMemInstToExecute() { if (retryMemInsts.empty()) { return nullptr; } else { - DynInstPtr mem_inst = std::move(retryMemInsts.front()); + O3DynInstPtr mem_inst = std::move(retryMemInsts.front()); retryMemInsts.pop_front(); return mem_inst; } @@ -1183,8 +1185,8 @@ InstructionQueue::getBlockedMemInstToExecute() template void -InstructionQueue::violation(const DynInstPtr &store, - const DynInstPtr &faulting_load) +InstructionQueue::violation(const O3DynInstPtr &store, + const O3DynInstPtr &faulting_load) { iqIOStats.intInstQueueWrites++; memDepUnit[store->threadNumber].violation(store, faulting_load); @@ -1223,7 +1225,7 @@ InstructionQueue::doSquash(ThreadID tid) while (squash_it != instList[tid].end() && (*squash_it)->seqNum > squashedSeqNum[tid]) { - DynInstPtr squashed_inst = (*squash_it); + O3DynInstPtr squashed_inst = (*squash_it); if (squashed_inst->isFloating()) { iqIOStats.fpInstQueueWrites++; } else if (squashed_inst->isVector()) { @@ -1329,7 +1331,7 @@ InstructionQueue::doSquash(ThreadID tid) // IQ clears out the heads of the dependency graph only when // instructions reach writeback stage. If an instruction is squashed // before writeback stage, its head of dependency graph would not be - // cleared out; it holds the instruction's DynInstPtr. This prevents + // cleared out; it holds the instruction's O3DynInstPtr. This prevents // freeing the squashed instruction's DynInst. // Thus, we need to manually clear out the squashed instructions' heads // of dependency graph. @@ -1352,7 +1354,15 @@ InstructionQueue::doSquash(ThreadID tid) template bool -InstructionQueue::addToDependents(const DynInstPtr &new_inst) +InstructionQueue::PqCompare::operator()( + const O3DynInstPtr &lhs, const O3DynInstPtr &rhs) const +{ + return lhs->seqNum > rhs->seqNum; +} + +template +bool +InstructionQueue::addToDependents(const O3DynInstPtr &new_inst) { // Loop through the instruction's source registers, adding // them to the dependency list if they are not ready. @@ -1400,7 +1410,7 @@ InstructionQueue::addToDependents(const DynInstPtr &new_inst) template void -InstructionQueue::addToProducers(const DynInstPtr &new_inst) +InstructionQueue::addToProducers(const O3DynInstPtr &new_inst) { // Nothing really needs to be marked when an instruction becomes // the producer of a register's value, but for convenience a ptr @@ -1436,7 +1446,7 @@ InstructionQueue::addToProducers(const DynInstPtr &new_inst) template void -InstructionQueue::addIfReady(const DynInstPtr &inst) +InstructionQueue::addIfReady(const O3DynInstPtr &inst) { // If the instruction now has all of its source registers // available, then add it to the list of ready instructions. diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index e7e1f274aa..eb76e655a3 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -53,6 +53,8 @@ #include "base/flags.hh" #include "base/types.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/impl.hh" #include "cpu/utils.hh" #include "enums/SMTQueuePolicy.hh" #include "mem/port.hh" @@ -74,7 +76,6 @@ class LSQ { public: typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; class LSQRequest; /** Derived class to hold any sender state the LSQ needs. */ @@ -93,7 +94,7 @@ class LSQ public: /** Instruction which initiated the access to memory. */ - DynInstPtr inst; + O3DynInstPtr inst; /** The main packet from a split load, used during writeback. */ PacketPtr mainPkt; /** A second packet from a split store that needs sending. */ @@ -113,7 +114,7 @@ class LSQ * case the SenderState knows. */ bool deleted; - ContextID contextId() { return inst->contextId(); } + ContextID contextId(); /** Completes a packet and returns whether the access is finished. */ inline bool isComplete() { return outstanding == 0; } @@ -293,7 +294,7 @@ class LSQ public: LSQUnit& _port; - const DynInstPtr _inst; + const O3DynInstPtr _inst; uint32_t _taskId; PacketDataPtr _data; std::vector _packets; @@ -308,38 +309,11 @@ class LSQ AtomicOpFunctorPtr _amo_op; protected: LSQUnit* lsqUnit() { return &_port; } - LSQRequest(LSQUnit *port, const DynInstPtr& inst, bool isLoad) : - _state(State::NotIssued), _senderState(nullptr), - _port(*port), _inst(inst), _data(nullptr), - _res(nullptr), _addr(0), _size(0), _flags(0), - _numOutstandingPackets(0), _amo_op(nullptr) - { - flags.set(Flag::IsLoad, isLoad); - flags.set(Flag::WbStore, - _inst->isStoreConditional() || _inst->isAtomic()); - flags.set(Flag::IsAtomic, _inst->isAtomic()); - install(); - } - LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad, - const Addr& addr, const uint32_t& size, - const Request::Flags& flags_, - PacketDataPtr data = nullptr, uint64_t* res = nullptr, - AtomicOpFunctorPtr amo_op = nullptr) - : _state(State::NotIssued), _senderState(nullptr), - numTranslatedFragments(0), - numInTranslationFragments(0), - _port(*port), _inst(inst), _data(data), - _res(res), _addr(addr), _size(size), - _flags(flags_), - _numOutstandingPackets(0), - _amo_op(std::move(amo_op)) - { - flags.set(Flag::IsLoad, isLoad); - flags.set(Flag::WbStore, - _inst->isStoreConditional() || _inst->isAtomic()); - flags.set(Flag::IsAtomic, _inst->isAtomic()); - install(); - } + LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad); + LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad, + const Addr& addr, const uint32_t& size, + const Request::Flags& flags_, PacketDataPtr data=nullptr, + uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr); bool isLoad() const @@ -354,21 +328,9 @@ class LSQ } /** Install the request in the LQ/SQ. */ - void install() - { - if (isLoad()) { - _port.loadQueue[_inst->lqIdx].setRequest(this); - } else { - // Store, StoreConditional, and Atomic requests are pushed - // to this storeQueue - _port.storeQueue[_inst->sqIdx].setRequest(this); - } - } - virtual bool - squashed() const override - { - return _inst->isSquashed(); - } + void install(); + + bool squashed() const override; /** * Test if the LSQRequest has been released, i.e. self-owned. @@ -391,7 +353,8 @@ class LSQ * but there is any in-flight translation request to the TLB or access * request to the memory. */ - void release(Flag reason) + void + release(Flag reason) { assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded); if (!isAnyOutstandingRequest()) { @@ -410,35 +373,14 @@ class LSQ * The request is only added if the mask is empty or if there is at * least an active element in it. */ - void - addRequest(Addr addr, unsigned size, - const std::vector& byte_enable) - { - if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) { - auto request = std::make_shared( - addr, size, _flags, _inst->requestorId(), - _inst->instAddr(), _inst->contextId(), - std::move(_amo_op)); - request->setByteEnable(byte_enable); - _requests.push_back(request); - } - } + void addRequest(Addr addr, unsigned size, + const std::vector& byte_enable); /** Destructor. * The LSQRequest owns the request. If the packet has already been * sent, the sender state will be deleted upon receiving the reply. */ - virtual ~LSQRequest() - { - assert(!isAnyOutstandingRequest()); - _inst->savedReq = nullptr; - if (_senderState) - delete _senderState; - - for (auto r: _packets) - delete r; - }; - + virtual ~LSQRequest(); public: /** Convenience getters/setters. */ @@ -450,7 +392,7 @@ class LSQ request()->setContext(context_id); } - const DynInstPtr& + const O3DynInstPtr& instruction() { return _inst; @@ -728,7 +670,7 @@ class LSQ using LSQRequest::_numOutstandingPackets; using LSQRequest::_amo_op; public: - SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, + SingleDataRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad, const Addr& addr, const uint32_t& size, const Request::Flags& flags_, PacketDataPtr data=nullptr, uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr) : @@ -766,7 +708,7 @@ class LSQ using LSQRequest::flags; using LSQRequest::setState; public: - HtmCmdRequest(LSQUnit* port, const DynInstPtr& inst, + HtmCmdRequest(LSQUnit* port, const O3DynInstPtr& inst, const Request::Flags& flags_); inline virtual ~HtmCmdRequest() {} virtual void initiateTranslation(); @@ -813,7 +755,7 @@ class LSQ PacketPtr _mainPacket; public: - SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, + SplitDataRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad, const Addr& addr, const uint32_t& size, const Request::Flags & flags_, PacketDataPtr data=nullptr, uint64_t* res=nullptr) : @@ -876,15 +818,15 @@ class LSQ void tick(); /** Inserts a load into the LSQ. */ - void insertLoad(const DynInstPtr &load_inst); + void insertLoad(const O3DynInstPtr &load_inst); /** Inserts a store into the LSQ. */ - void insertStore(const DynInstPtr &store_inst); + void insertStore(const O3DynInstPtr &store_inst); /** Executes a load. */ - Fault executeLoad(const DynInstPtr &inst); + Fault executeLoad(const O3DynInstPtr &inst); /** Executes a store. */ - Fault executeStore(const DynInstPtr &inst); + Fault executeStore(const O3DynInstPtr &inst); /** * Commits loads up until the given sequence number for a specific thread. @@ -924,7 +866,7 @@ class LSQ bool violation(ThreadID tid) { return thread.at(tid).violation(); } /** Gets the instruction that caused the memory ordering violation. */ - DynInstPtr + O3DynInstPtr getMemDepViolator(ThreadID tid) { return thread.at(tid).getMemDepViolator(); @@ -1103,7 +1045,7 @@ class LSQ void recvTimingSnoopReq(PacketPtr pkt); - Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, + Fault pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector& byte_enable); diff --git a/src/cpu/o3/lsq_impl.hh b/src/cpu/o3/lsq_impl.hh index 452a679d8b..5cde78e0f8 100644 --- a/src/cpu/o3/lsq_impl.hh +++ b/src/cpu/o3/lsq_impl.hh @@ -49,6 +49,7 @@ #include "base/compiler.hh" #include "base/logging.hh" #include "cpu/o3/cpu.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/o3/iew.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/lsq.hh" @@ -59,6 +60,13 @@ #include "debug/Writeback.hh" #include "params/DerivO3CPU.hh" +template +ContextID +LSQ::LSQSenderState::contextId() +{ + return inst->contextId(); +} + template LSQ::LSQ(O3CPU *cpu_ptr, DefaultIEW *iew_ptr, const DerivO3CPUParams ¶ms) @@ -220,7 +228,7 @@ LSQ::cachePortBusy(bool is_load) template void -LSQ::insertLoad(const DynInstPtr &load_inst) +LSQ::insertLoad(const O3DynInstPtr &load_inst) { ThreadID tid = load_inst->threadNumber; @@ -229,7 +237,7 @@ LSQ::insertLoad(const DynInstPtr &load_inst) template void -LSQ::insertStore(const DynInstPtr &store_inst) +LSQ::insertStore(const O3DynInstPtr &store_inst) { ThreadID tid = store_inst->threadNumber; @@ -238,7 +246,7 @@ LSQ::insertStore(const DynInstPtr &store_inst) template Fault -LSQ::executeLoad(const DynInstPtr &inst) +LSQ::executeLoad(const O3DynInstPtr &inst) { ThreadID tid = inst->threadNumber; @@ -247,7 +255,7 @@ LSQ::executeLoad(const DynInstPtr &inst) template Fault -LSQ::executeStore(const DynInstPtr &inst) +LSQ::executeStore(const O3DynInstPtr &inst) { ThreadID tid = inst->threadNumber; @@ -676,7 +684,7 @@ LSQ::dumpInsts() const template Fault -LSQ::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, +LSQ::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, const std::vector& byte_enable) @@ -951,6 +959,85 @@ LSQ::SplitDataRequest::initiateTranslation() } } +template +LSQ::LSQRequest::LSQRequest( + LSQUnit *port, const O3DynInstPtr& inst, bool isLoad) : + _state(State::NotIssued), _senderState(nullptr), + _port(*port), _inst(inst), _data(nullptr), + _res(nullptr), _addr(0), _size(0), _flags(0), + _numOutstandingPackets(0), _amo_op(nullptr) +{ + flags.set(Flag::IsLoad, isLoad); + flags.set(Flag::WbStore, + _inst->isStoreConditional() || _inst->isAtomic()); + flags.set(Flag::IsAtomic, _inst->isAtomic()); + install(); +} + +template +LSQ::LSQRequest::LSQRequest( + LSQUnit* port, const O3DynInstPtr& inst, bool isLoad, + const Addr& addr, const uint32_t& size, const Request::Flags& flags_, + PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op) + : _state(State::NotIssued), _senderState(nullptr), + numTranslatedFragments(0), + numInTranslationFragments(0), + _port(*port), _inst(inst), _data(data), + _res(res), _addr(addr), _size(size), + _flags(flags_), + _numOutstandingPackets(0), + _amo_op(std::move(amo_op)) +{ + flags.set(Flag::IsLoad, isLoad); + flags.set(Flag::WbStore, + _inst->isStoreConditional() || _inst->isAtomic()); + flags.set(Flag::IsAtomic, _inst->isAtomic()); + install(); +} + +template +void +LSQ::LSQRequest::install() +{ + if (isLoad()) { + _port.loadQueue[_inst->lqIdx].setRequest(this); + } else { + // Store, StoreConditional, and Atomic requests are pushed + // to this storeQueue + _port.storeQueue[_inst->sqIdx].setRequest(this); + } +} + +template +bool LSQ::LSQRequest::squashed() const { return _inst->isSquashed(); } + +template +void +LSQ::LSQRequest::addRequest(Addr addr, unsigned size, + const std::vector& byte_enable) +{ + if (isAnyActiveElement(byte_enable.begin(), byte_enable.end())) { + auto request = std::make_shared( + addr, size, _flags, _inst->requestorId(), + _inst->instAddr(), _inst->contextId(), + std::move(_amo_op)); + request->setByteEnable(byte_enable); + _requests.push_back(request); + } +} + +template +LSQ::LSQRequest::~LSQRequest() +{ + assert(!isAnyOutstandingRequest()); + _inst->savedReq = nullptr; + if (_senderState) + delete _senderState; + + for (auto r: _packets) + delete r; +}; + template void LSQ::LSQRequest::sendFragmentToTranslation(int i) @@ -1226,7 +1313,7 @@ LSQ::DcachePort::recvReqRetry() template LSQ::HtmCmdRequest::HtmCmdRequest(LSQUnit* port, - const DynInstPtr& inst, + const O3DynInstPtr& inst, const Request::Flags& flags_) : SingleDataRequest(port, inst, true, 0x0lu, 8, flags_, nullptr, nullptr, nullptr) diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index eda044d0d6..4d3c41958e 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -53,6 +53,7 @@ #include "arch/locked_mem.hh" #include "config/the_isa.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/lsq.hh" #include "cpu/timebuf.hh" #include "debug/HtmCpu.hh" @@ -85,7 +86,6 @@ class LSQUnit static constexpr auto MaxDataBytes = MaxVecRegLenInBytes; typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::IssueStruct IssueStruct; using LSQSenderState = typename LSQ::LSQSenderState; @@ -95,23 +95,17 @@ class LSQUnit { private: /** The instruction. */ - DynInstPtr inst; + O3DynInstPtr inst; /** The request. */ - LSQRequest* req; + LSQRequest* req = nullptr; /** The size of the operation. */ - uint32_t _size; + uint32_t _size = 0; /** Valid entry. */ - bool _valid; - public: - /** Constructs an empty store queue entry. */ - LSQEntry() - : inst(nullptr), req(nullptr), _size(0), _valid(false) - { - } + bool _valid = false; + public: ~LSQEntry() { - inst = nullptr; if (req != nullptr) { req->freeLSQEntry(); req = nullptr; @@ -131,13 +125,14 @@ class LSQUnit } void - set(const DynInstPtr& inst) + set(const O3DynInstPtr& inst) { assert(!_valid); this->inst = inst; _valid = true; _size = 0; } + LSQRequest* request() { return req; } void setRequest(LSQRequest* r) { req = r; } bool hasRequest() { return req != nullptr; } @@ -146,7 +141,7 @@ class LSQUnit bool valid() const { return _valid; } uint32_t& size() { return _size; } const uint32_t& size() const { return _size; } - const DynInstPtr& instruction() const { return inst; } + const O3DynInstPtr& instruction() const { return inst; } /** @} */ }; @@ -156,32 +151,27 @@ class LSQUnit /** The store data. */ char _data[MaxDataBytes]; /** Whether or not the store can writeback. */ - bool _canWB; + bool _canWB = false; /** Whether or not the store is committed. */ - bool _committed; + bool _committed = false; /** Whether or not the store is completed. */ - bool _completed; + bool _completed = false; /** Does this request write all zeros and thus doesn't * have any data attached to it. Used for cache block zero * style instructs (ARM DC ZVA; ALPHA WH64) */ - bool _isAllZeros; + bool _isAllZeros = false; + public: static constexpr size_t DataSize = sizeof(_data); /** Constructs an empty store queue entry. */ SQEntry() - : _canWB(false), _committed(false), _completed(false), - _isAllZeros(false) { std::memset(_data, 0, DataSize); } - ~SQEntry() - { - } - void - set(const DynInstPtr& inst) + set(const O3DynInstPtr& inst) { LSQEntry::set(inst); } @@ -192,6 +182,7 @@ class LSQUnit LSQEntry::clear(); _canWB = _completed = _committed = _isAllZeros = false; } + /** Member accessors. */ /** @{ */ bool& canWB() { return _canWB; } @@ -250,11 +241,11 @@ class LSQUnit void takeOverFrom(); /** Inserts an instruction. */ - void insert(const DynInstPtr &inst); + void insert(const O3DynInstPtr &inst); /** Inserts a load instruction. */ - void insertLoad(const DynInstPtr &load_inst); + void insertLoad(const O3DynInstPtr &load_inst); /** Inserts a store instruction. */ - void insertStore(const DynInstPtr &store_inst); + void insertStore(const O3DynInstPtr &store_inst); /** Check for ordering violations in the LSQ. For a store squash if we * ever find a conflicting load. For a load, only squash if we @@ -263,7 +254,7 @@ class LSQUnit * @param inst the instruction to check */ Fault checkViolations(typename LoadQueue::iterator& loadIt, - const DynInstPtr& inst); + const O3DynInstPtr& inst); /** Check if an incoming invalidate hits in the lsq on a load * that might have issued out of order wrt another load beacuse @@ -272,11 +263,11 @@ class LSQUnit void checkSnoop(PacketPtr pkt); /** Executes a load instruction. */ - Fault executeLoad(const DynInstPtr &inst); + Fault executeLoad(const O3DynInstPtr &inst); Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } /** Executes a store instruction. */ - Fault executeStore(const DynInstPtr &inst); + Fault executeStore(const O3DynInstPtr &inst); /** Commits the head load. */ void commitLoad(); @@ -302,7 +293,7 @@ class LSQUnit bool violation() { return memDepViolator; } /** Returns the memory ordering violator. */ - DynInstPtr getMemDepViolator(); + O3DynInstPtr getMemDepViolator(); /** Returns the number of free LQ entries. */ unsigned numFreeLoadEntries(); @@ -378,7 +369,7 @@ class LSQUnit void resetState(); /** Writes back the instruction, sending it to IEW. */ - void writeback(const DynInstPtr &inst, PacketPtr pkt); + void writeback(const O3DynInstPtr &inst, PacketPtr pkt); /** Try to finish a previously blocked write back attempt */ void writebackBlockedStore(); @@ -460,7 +451,7 @@ class LSQUnit { public: /** Constructs a writeback event. */ - WritebackEvent(const DynInstPtr &_inst, PacketPtr pkt, + WritebackEvent(const O3DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); /** Processes the writeback event. */ @@ -471,7 +462,7 @@ class LSQUnit private: /** Instruction whose results are being written back. */ - DynInstPtr inst; + O3DynInstPtr inst; /** The packet that would have been sent to memory. */ PacketPtr pkt; @@ -552,7 +543,7 @@ class LSQUnit bool storeInFlight; /** The oldest load that caused a memory ordering violation. */ - DynInstPtr memDepViolator; + O3DynInstPtr memDepViolator; /** Whether or not there is a packet that couldn't be sent because of * a lack of cache ports. */ @@ -634,357 +625,4 @@ class LSQUnit typedef CircularQueue SQueue; }; -template -Fault -LSQUnit::read(LSQRequest *req, int load_idx) -{ - LQEntry& load_req = loadQueue[load_idx]; - const DynInstPtr& load_inst = load_req.instruction(); - - load_req.setRequest(req); - assert(load_inst); - - assert(!load_inst->isExecuted()); - - // Make sure this isn't a strictly ordered load - // A bit of a hackish way to get strictly ordered accesses to work - // only if they're at the head of the LSQ and are ready to commit - // (at the head of the ROB too). - - if (req->mainRequest()->isStrictlyOrdered() && - (load_idx != loadQueue.head() || !load_inst->isAtCommit())) { - // Tell IQ/mem dep unit that this instruction will need to be - // rescheduled eventually - iewStage->rescheduleMemInst(load_inst); - load_inst->clearIssued(); - load_inst->effAddrValid(false); - ++stats.rescheduledLoads; - DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n", - load_inst->seqNum, load_inst->pcState()); - - // Must delete request now that it wasn't handed off to - // memory. This is quite ugly. @todo: Figure out the proper - // place to really handle request deletes. - load_req.setRequest(nullptr); - req->discard(); - return std::make_shared( - "Strictly ordered load [sn:%llx] PC %s\n", - load_inst->seqNum, load_inst->pcState()); - } - - DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " - "storeHead: %i addr: %#x%s\n", - load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1, - req->mainRequest()->getPaddr(), req->isSplit() ? " split" : ""); - - if (req->mainRequest()->isLLSC()) { - // Disable recording the result temporarily. Writing to misc - // regs normally updates the result, but this is not the - // desired behavior when handling store conditionals. - load_inst->recordResult(false); - TheISA::handleLockedRead(load_inst.get(), req->mainRequest()); - load_inst->recordResult(true); - } - - if (req->mainRequest()->isLocalAccess()) { - assert(!load_inst->memData); - assert(!load_inst->inHtmTransactionalState()); - load_inst->memData = new uint8_t[MaxDataBytes]; - - ThreadContext *thread = cpu->tcBase(lsqID); - PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq); - - main_pkt->dataStatic(load_inst->memData); - - Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt); - - WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this); - cpu->schedule(wb, cpu->clockEdge(delay)); - return NoFault; - } - - // hardware transactional memory - if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit()) - { - // don't want to send nested transactionStarts and - // transactionStops outside of core, e.g. to Ruby - if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) { - Cycles delay(0); - PacketPtr data_pkt = - new Packet(req->mainRequest(), MemCmd::ReadReq); - - // Allocate memory if this is the first time a load is issued. - if (!load_inst->memData) { - load_inst->memData = - new uint8_t[req->mainRequest()->getSize()]; - // sanity checks espect zero in request's data - memset(load_inst->memData, 0, req->mainRequest()->getSize()); - } - - data_pkt->dataStatic(load_inst->memData); - if (load_inst->inHtmTransactionalState()) { - data_pkt->setHtmTransactional( - load_inst->getHtmTransactionUid()); - } - data_pkt->makeResponse(); - - WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); - cpu->schedule(wb, cpu->clockEdge(delay)); - return NoFault; - } - } - - // Check the SQ for any previous stores that might lead to forwarding - auto store_it = load_inst->sqIt; - assert (store_it >= storeWBIt); - // End once we've reached the top of the LSQ - while (store_it != storeWBIt) { - // Move the index to one younger - store_it--; - assert(store_it->valid()); - assert(store_it->instruction()->seqNum < load_inst->seqNum); - int store_size = store_it->size(); - - // Cache maintenance instructions go down via the store - // path but they carry no data and they shouldn't be - // considered for forwarding - if (store_size != 0 && !store_it->instruction()->strictlyOrdered() && - !(store_it->request()->mainRequest() && - store_it->request()->mainRequest()->isCacheMaintenance())) { - assert(store_it->instruction()->effAddrValid()); - - // Check if the store data is within the lower and upper bounds of - // addresses that the request needs. - auto req_s = req->mainRequest()->getVaddr(); - auto req_e = req_s + req->mainRequest()->getSize(); - auto st_s = store_it->instruction()->effAddr; - auto st_e = st_s + store_size; - - bool store_has_lower_limit = req_s >= st_s; - bool store_has_upper_limit = req_e <= st_e; - bool lower_load_has_store_part = req_s < st_e; - bool upper_load_has_store_part = req_e > st_s; - - auto coverage = AddrRangeCoverage::NoAddrRangeCoverage; - - // If the store entry is not atomic (atomic does not have valid - // data), the store has all of the data needed, and - // the load is not LLSC, then - // we can forward data from the store to the load - if (!store_it->instruction()->isAtomic() && - store_has_lower_limit && store_has_upper_limit && - !req->mainRequest()->isLLSC()) { - - const auto& store_req = store_it->request()->mainRequest(); - coverage = store_req->isMasked() ? - AddrRangeCoverage::PartialAddrRangeCoverage : - AddrRangeCoverage::FullAddrRangeCoverage; - } else if ( - // This is the partial store-load forwarding case where a store - // has only part of the load's data and the load isn't LLSC - (!req->mainRequest()->isLLSC() && - ((store_has_lower_limit && lower_load_has_store_part) || - (store_has_upper_limit && upper_load_has_store_part) || - (lower_load_has_store_part && upper_load_has_store_part))) || - // The load is LLSC, and the store has all or part of the - // load's data - (req->mainRequest()->isLLSC() && - ((store_has_lower_limit || upper_load_has_store_part) && - (store_has_upper_limit || lower_load_has_store_part))) || - // The store entry is atomic and has all or part of the load's - // data - (store_it->instruction()->isAtomic() && - ((store_has_lower_limit || upper_load_has_store_part) && - (store_has_upper_limit || lower_load_has_store_part)))) { - - coverage = AddrRangeCoverage::PartialAddrRangeCoverage; - } - - if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) { - // Get shift amount for offset into the store's data. - int shift_amt = req->mainRequest()->getVaddr() - - store_it->instruction()->effAddr; - - // Allocate memory if this is the first time a load is issued. - if (!load_inst->memData) { - load_inst->memData = - new uint8_t[req->mainRequest()->getSize()]; - } - if (store_it->isAllZeros()) - memset(load_inst->memData, 0, - req->mainRequest()->getSize()); - else - memcpy(load_inst->memData, - store_it->data() + shift_amt, - req->mainRequest()->getSize()); - - DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " - "addr %#x\n", store_it._idx, - req->mainRequest()->getVaddr()); - - PacketPtr data_pkt = new Packet(req->mainRequest(), - MemCmd::ReadReq); - data_pkt->dataStatic(load_inst->memData); - - // hardware transactional memory - // Store to load forwarding within a transaction - // This should be okay because the store will be sent to - // the memory subsystem and subsequently get added to the - // write set of the transaction. The write set has a stronger - // property than the read set, so the load doesn't necessarily - // have to be there. - assert(!req->mainRequest()->isHTMCmd()); - if (load_inst->inHtmTransactionalState()) { - assert (!storeQueue[store_it._idx].completed()); - assert ( - storeQueue[store_it._idx].instruction()-> - inHtmTransactionalState()); - assert ( - load_inst->getHtmTransactionUid() == - storeQueue[store_it._idx].instruction()-> - getHtmTransactionUid()); - data_pkt->setHtmTransactional( - load_inst->getHtmTransactionUid()); - DPRINTF(HtmCpu, "HTM LD (ST2LDF) " - "pc=0x%lx - vaddr=0x%lx - " - "paddr=0x%lx - htmUid=%u\n", - load_inst->instAddr(), - data_pkt->req->hasVaddr() ? - data_pkt->req->getVaddr() : 0lu, - data_pkt->getAddr(), - load_inst->getHtmTransactionUid()); - } - - if (req->isAnyOutstandingRequest()) { - assert(req->_numOutstandingPackets > 0); - // There are memory requests packets in flight already. - // This may happen if the store was not complete the - // first time this load got executed. Signal the senderSate - // that response packets should be discarded. - req->discardSenderState(); - } - - WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, - this); - - // We'll say this has a 1 cycle load-store forwarding latency - // for now. - // @todo: Need to make this a parameter. - cpu->schedule(wb, curTick()); - - // Don't need to do anything special for split loads. - ++stats.forwLoads; - - return NoFault; - } else if (coverage == AddrRangeCoverage::PartialAddrRangeCoverage) { - // If it's already been written back, then don't worry about - // stalling on it. - if (store_it->completed()) { - panic("Should not check one of these"); - continue; - } - - // Must stall load and force it to retry, so long as it's the - // oldest load that needs to do so. - if (!stalled || - (stalled && - load_inst->seqNum < - loadQueue[stallingLoadIdx].instruction()->seqNum)) { - stalled = true; - stallingStoreIsn = store_it->instruction()->seqNum; - stallingLoadIdx = load_idx; - } - - // Tell IQ/mem dep unit that this instruction will need to be - // rescheduled eventually - iewStage->rescheduleMemInst(load_inst); - load_inst->clearIssued(); - load_inst->effAddrValid(false); - ++stats.rescheduledLoads; - - // Do not generate a writeback event as this instruction is not - // complete. - DPRINTF(LSQUnit, "Load-store forwarding mis-match. " - "Store idx %i to load addr %#x\n", - store_it._idx, req->mainRequest()->getVaddr()); - - // Must discard the request. - req->discard(); - load_req.setRequest(nullptr); - return NoFault; - } - } - } - - // If there's no forwarding case, then go access memory - DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n", - load_inst->seqNum, load_inst->pcState()); - - // Allocate memory if this is the first time a load is issued. - if (!load_inst->memData) { - load_inst->memData = new uint8_t[req->mainRequest()->getSize()]; - } - - - // hardware transactional memory - if (req->mainRequest()->isHTMCmd()) { - // this is a simple sanity check - // the Ruby cache controller will set - // memData to 0x0ul if successful. - *load_inst->memData = (uint64_t) 0x1ull; - } - - // For now, load throughput is constrained by the number of - // load FUs only, and loads do not consume a cache port (only - // stores do). - // @todo We should account for cache port contention - // and arbitrate between loads and stores. - - // if we the cache is not blocked, do cache access - if (req->senderState() == nullptr) { - LQSenderState *state = new LQSenderState( - loadQueue.getIterator(load_idx)); - state->isLoad = true; - state->inst = load_inst; - state->isSplit = req->isSplit(); - req->senderState(state); - } - req->buildPackets(); - req->sendPacketToCache(); - if (!req->isSent()) - iewStage->blockMemInst(load_inst); - - return NoFault; -} - -template -Fault -LSQUnit::write(LSQRequest *req, uint8_t *data, int store_idx) -{ - assert(storeQueue[store_idx].valid()); - - DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i " - "[sn:%llu]\n", - store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1, - storeQueue[store_idx].instruction()->seqNum); - - storeQueue[store_idx].setRequest(req); - unsigned size = req->_size; - storeQueue[store_idx].size() = size; - bool store_no_data = - req->mainRequest()->getFlags() & Request::STORE_NO_DATA; - storeQueue[store_idx].isAllZeros() = store_no_data; - assert(size <= SQEntry::DataSize || store_no_data); - - // copy data into the storeQueue only if the store request has valid data - if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) && - !req->request()->isCacheMaintenance() && - !req->request()->isAtomic()) - memcpy(storeQueue[store_idx].data(), data, size); - - // This function only writes the data to the store queue, so no fault - // can happen here. - return NoFault; -} - #endif // __CPU_O3_LSQ_UNIT_HH__ diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh index bafd88e4b3..174916df99 100644 --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -60,7 +60,7 @@ #include "mem/request.hh" template -LSQUnit::WritebackEvent::WritebackEvent(const DynInstPtr &_inst, +LSQUnit::WritebackEvent::WritebackEvent(const O3DynInstPtr &_inst, PacketPtr _pkt, LSQUnit *lsq_ptr) : Event(Default_Pri, AutoDelete), inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) @@ -112,7 +112,7 @@ void LSQUnit::completeDataAccess(PacketPtr pkt) { LSQSenderState *state = dynamic_cast(pkt->senderState); - DynInstPtr inst = state->inst; + O3DynInstPtr inst = state->inst; // hardware transactional memory // sanity check @@ -317,7 +317,7 @@ LSQUnit::takeOverFrom() template void -LSQUnit::insert(const DynInstPtr &inst) +LSQUnit::insert(const O3DynInstPtr &inst) { assert(inst->isMemRef()); @@ -334,7 +334,7 @@ LSQUnit::insert(const DynInstPtr &inst) template void -LSQUnit::insertLoad(const DynInstPtr &load_inst) +LSQUnit::insertLoad(const O3DynInstPtr &load_inst) { assert(!loadQueue.full()); assert(loads < loadQueue.capacity()); @@ -397,7 +397,7 @@ LSQUnit::insertLoad(const DynInstPtr &load_inst) template void -LSQUnit::insertStore(const DynInstPtr& store_inst) +LSQUnit::insertStore(const O3DynInstPtr& store_inst) { // Make sure it is not full before inserting an instruction. assert(!storeQueue.full()); @@ -418,10 +418,10 @@ LSQUnit::insertStore(const DynInstPtr& store_inst) } template -typename Impl::DynInstPtr +O3DynInstPtr LSQUnit::getMemDepViolator() { - DynInstPtr temp = memDepViolator; + O3DynInstPtr temp = memDepViolator; memDepViolator = NULL; @@ -475,7 +475,7 @@ LSQUnit::checkSnoop(PacketPtr pkt) Addr invalidate_addr = pkt->getAddr() & cacheBlockMask; - DynInstPtr ld_inst = iter->instruction(); + O3DynInstPtr ld_inst = iter->instruction(); assert(ld_inst); LSQRequest *req = iter->request(); @@ -535,7 +535,7 @@ LSQUnit::checkSnoop(PacketPtr pkt) template Fault LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, - const DynInstPtr& inst) + const O3DynInstPtr& inst) { Addr inst_eff_addr1 = inst->effAddr >> depCheckShift; Addr inst_eff_addr2 = (inst->effAddr + inst->effSize - 1) >> depCheckShift; @@ -546,7 +546,7 @@ LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, * like the implementation that came before it, we're overly conservative. */ while (loadIt != loadQueue.end()) { - DynInstPtr ld_inst = loadIt->instruction(); + O3DynInstPtr ld_inst = loadIt->instruction(); if (!ld_inst->effAddrValid() || ld_inst->strictlyOrdered()) { ++loadIt; continue; @@ -615,7 +615,7 @@ LSQUnit::checkViolations(typename LoadQueue::iterator& loadIt, template Fault -LSQUnit::executeLoad(const DynInstPtr &inst) +LSQUnit::executeLoad(const O3DynInstPtr &inst) { // Execute a specific load. Fault load_fault = NoFault; @@ -682,7 +682,7 @@ LSQUnit::executeLoad(const DynInstPtr &inst) template Fault -LSQUnit::executeStore(const DynInstPtr &store_inst) +LSQUnit::executeStore(const O3DynInstPtr &store_inst) { // Make sure that a store exists. assert(stores != 0); @@ -837,7 +837,7 @@ LSQUnit::writebackStores() assert(storeWBIt->hasRequest()); assert(!storeWBIt->committed()); - DynInstPtr inst = storeWBIt->instruction(); + O3DynInstPtr inst = storeWBIt->instruction(); LSQRequest* req = storeWBIt->request(); // Process store conditionals or store release after all previous @@ -1095,7 +1095,7 @@ LSQUnit::storePostSend() template void -LSQUnit::writeback(const DynInstPtr &inst, PacketPtr pkt) +LSQUnit::writeback(const O3DynInstPtr &inst, PacketPtr pkt) { iewStage->wakeCPU(); @@ -1170,7 +1170,7 @@ LSQUnit::completeStore(typename StoreQueue::iterator store_idx) /* We 'need' a copy here because we may clear the entry from the * store queue. */ - DynInstPtr store_inst = store_idx->instruction(); + O3DynInstPtr store_inst = store_idx->instruction(); if (store_idx == storeQueue.begin()) { do { storeQueue.front().clear(); @@ -1279,7 +1279,7 @@ LSQUnit::dumpInsts() const cprintf("Load queue: "); for (const auto& e: loadQueue) { - const DynInstPtr &inst(e.instruction()); + const O3DynInstPtr &inst(e.instruction()); cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum); } cprintf("\n"); @@ -1288,7 +1288,7 @@ LSQUnit::dumpInsts() const cprintf("Store queue: "); for (const auto& e: storeQueue) { - const DynInstPtr &inst(e.instruction()); + const O3DynInstPtr &inst(e.instruction()); cprintf("%s.[sn:%llu] ", inst->pcState(), inst->seqNum); } @@ -1302,4 +1302,358 @@ LSQUnit::cacheLineSize() return cpu->cacheLineSize(); } +template +Fault +LSQUnit::read(LSQRequest *req, int load_idx) +{ + LQEntry& load_req = loadQueue[load_idx]; + const O3DynInstPtr& load_inst = load_req.instruction(); + + load_req.setRequest(req); + assert(load_inst); + + assert(!load_inst->isExecuted()); + + // Make sure this isn't a strictly ordered load + // A bit of a hackish way to get strictly ordered accesses to work + // only if they're at the head of the LSQ and are ready to commit + // (at the head of the ROB too). + + if (req->mainRequest()->isStrictlyOrdered() && + (load_idx != loadQueue.head() || !load_inst->isAtCommit())) { + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + iewStage->rescheduleMemInst(load_inst); + load_inst->clearIssued(); + load_inst->effAddrValid(false); + ++stats.rescheduledLoads; + DPRINTF(LSQUnit, "Strictly ordered load [sn:%lli] PC %s\n", + load_inst->seqNum, load_inst->pcState()); + + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + load_req.setRequest(nullptr); + req->discard(); + return std::make_shared( + "Strictly ordered load [sn:%llx] PC %s\n", + load_inst->seqNum, load_inst->pcState()); + } + + DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, " + "storeHead: %i addr: %#x%s\n", + load_idx - 1, load_inst->sqIt._idx, storeQueue.head() - 1, + req->mainRequest()->getPaddr(), req->isSplit() ? " split" : ""); + + if (req->mainRequest()->isLLSC()) { + // Disable recording the result temporarily. Writing to misc + // regs normally updates the result, but this is not the + // desired behavior when handling store conditionals. + load_inst->recordResult(false); + TheISA::handleLockedRead(load_inst.get(), req->mainRequest()); + load_inst->recordResult(true); + } + + if (req->mainRequest()->isLocalAccess()) { + assert(!load_inst->memData); + assert(!load_inst->inHtmTransactionalState()); + load_inst->memData = new uint8_t[MaxDataBytes]; + + ThreadContext *thread = cpu->tcBase(lsqID); + PacketPtr main_pkt = new Packet(req->mainRequest(), MemCmd::ReadReq); + + main_pkt->dataStatic(load_inst->memData); + + Cycles delay = req->mainRequest()->localAccessor(thread, main_pkt); + + WritebackEvent *wb = new WritebackEvent(load_inst, main_pkt, this); + cpu->schedule(wb, cpu->clockEdge(delay)); + return NoFault; + } + + // hardware transactional memory + if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit()) + { + // don't want to send nested transactionStarts and + // transactionStops outside of core, e.g. to Ruby + if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) { + Cycles delay(0); + PacketPtr data_pkt = + new Packet(req->mainRequest(), MemCmd::ReadReq); + + // Allocate memory if this is the first time a load is issued. + if (!load_inst->memData) { + load_inst->memData = + new uint8_t[req->mainRequest()->getSize()]; + // sanity checks espect zero in request's data + memset(load_inst->memData, 0, req->mainRequest()->getSize()); + } + + data_pkt->dataStatic(load_inst->memData); + if (load_inst->inHtmTransactionalState()) { + data_pkt->setHtmTransactional( + load_inst->getHtmTransactionUid()); + } + data_pkt->makeResponse(); + + WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); + cpu->schedule(wb, cpu->clockEdge(delay)); + return NoFault; + } + } + + // Check the SQ for any previous stores that might lead to forwarding + auto store_it = load_inst->sqIt; + assert (store_it >= storeWBIt); + // End once we've reached the top of the LSQ + while (store_it != storeWBIt) { + // Move the index to one younger + store_it--; + assert(store_it->valid()); + assert(store_it->instruction()->seqNum < load_inst->seqNum); + int store_size = store_it->size(); + + // Cache maintenance instructions go down via the store + // path but they carry no data and they shouldn't be + // considered for forwarding + if (store_size != 0 && !store_it->instruction()->strictlyOrdered() && + !(store_it->request()->mainRequest() && + store_it->request()->mainRequest()->isCacheMaintenance())) { + assert(store_it->instruction()->effAddrValid()); + + // Check if the store data is within the lower and upper bounds of + // addresses that the request needs. + auto req_s = req->mainRequest()->getVaddr(); + auto req_e = req_s + req->mainRequest()->getSize(); + auto st_s = store_it->instruction()->effAddr; + auto st_e = st_s + store_size; + + bool store_has_lower_limit = req_s >= st_s; + bool store_has_upper_limit = req_e <= st_e; + bool lower_load_has_store_part = req_s < st_e; + bool upper_load_has_store_part = req_e > st_s; + + auto coverage = AddrRangeCoverage::NoAddrRangeCoverage; + + // If the store entry is not atomic (atomic does not have valid + // data), the store has all of the data needed, and + // the load is not LLSC, then + // we can forward data from the store to the load + if (!store_it->instruction()->isAtomic() && + store_has_lower_limit && store_has_upper_limit && + !req->mainRequest()->isLLSC()) { + + const auto& store_req = store_it->request()->mainRequest(); + coverage = store_req->isMasked() ? + AddrRangeCoverage::PartialAddrRangeCoverage : + AddrRangeCoverage::FullAddrRangeCoverage; + } else if ( + // This is the partial store-load forwarding case where a store + // has only part of the load's data and the load isn't LLSC + (!req->mainRequest()->isLLSC() && + ((store_has_lower_limit && lower_load_has_store_part) || + (store_has_upper_limit && upper_load_has_store_part) || + (lower_load_has_store_part && upper_load_has_store_part))) || + // The load is LLSC, and the store has all or part of the + // load's data + (req->mainRequest()->isLLSC() && + ((store_has_lower_limit || upper_load_has_store_part) && + (store_has_upper_limit || lower_load_has_store_part))) || + // The store entry is atomic and has all or part of the load's + // data + (store_it->instruction()->isAtomic() && + ((store_has_lower_limit || upper_load_has_store_part) && + (store_has_upper_limit || lower_load_has_store_part)))) { + + coverage = AddrRangeCoverage::PartialAddrRangeCoverage; + } + + if (coverage == AddrRangeCoverage::FullAddrRangeCoverage) { + // Get shift amount for offset into the store's data. + int shift_amt = req->mainRequest()->getVaddr() - + store_it->instruction()->effAddr; + + // Allocate memory if this is the first time a load is issued. + if (!load_inst->memData) { + load_inst->memData = + new uint8_t[req->mainRequest()->getSize()]; + } + if (store_it->isAllZeros()) + memset(load_inst->memData, 0, + req->mainRequest()->getSize()); + else + memcpy(load_inst->memData, + store_it->data() + shift_amt, + req->mainRequest()->getSize()); + + DPRINTF(LSQUnit, "Forwarding from store idx %i to load to " + "addr %#x\n", store_it._idx, + req->mainRequest()->getVaddr()); + + PacketPtr data_pkt = new Packet(req->mainRequest(), + MemCmd::ReadReq); + data_pkt->dataStatic(load_inst->memData); + + // hardware transactional memory + // Store to load forwarding within a transaction + // This should be okay because the store will be sent to + // the memory subsystem and subsequently get added to the + // write set of the transaction. The write set has a stronger + // property than the read set, so the load doesn't necessarily + // have to be there. + assert(!req->mainRequest()->isHTMCmd()); + if (load_inst->inHtmTransactionalState()) { + assert (!storeQueue[store_it._idx].completed()); + assert ( + storeQueue[store_it._idx].instruction()-> + inHtmTransactionalState()); + assert ( + load_inst->getHtmTransactionUid() == + storeQueue[store_it._idx].instruction()-> + getHtmTransactionUid()); + data_pkt->setHtmTransactional( + load_inst->getHtmTransactionUid()); + DPRINTF(HtmCpu, "HTM LD (ST2LDF) " + "pc=0x%lx - vaddr=0x%lx - " + "paddr=0x%lx - htmUid=%u\n", + load_inst->instAddr(), + data_pkt->req->hasVaddr() ? + data_pkt->req->getVaddr() : 0lu, + data_pkt->getAddr(), + load_inst->getHtmTransactionUid()); + } + + if (req->isAnyOutstandingRequest()) { + assert(req->_numOutstandingPackets > 0); + // There are memory requests packets in flight already. + // This may happen if the store was not complete the + // first time this load got executed. Signal the senderSate + // that response packets should be discarded. + req->discardSenderState(); + } + + WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, + this); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + cpu->schedule(wb, curTick()); + + // Don't need to do anything special for split loads. + ++stats.forwLoads; + + return NoFault; + } else if ( + coverage == AddrRangeCoverage::PartialAddrRangeCoverage) { + // If it's already been written back, then don't worry about + // stalling on it. + if (store_it->completed()) { + panic("Should not check one of these"); + continue; + } + + // Must stall load and force it to retry, so long as it's the + // oldest load that needs to do so. + if (!stalled || + (stalled && + load_inst->seqNum < + loadQueue[stallingLoadIdx].instruction()->seqNum)) { + stalled = true; + stallingStoreIsn = store_it->instruction()->seqNum; + stallingLoadIdx = load_idx; + } + + // Tell IQ/mem dep unit that this instruction will need to be + // rescheduled eventually + iewStage->rescheduleMemInst(load_inst); + load_inst->clearIssued(); + load_inst->effAddrValid(false); + ++stats.rescheduledLoads; + + // Do not generate a writeback event as this instruction is not + // complete. + DPRINTF(LSQUnit, "Load-store forwarding mis-match. " + "Store idx %i to load addr %#x\n", + store_it._idx, req->mainRequest()->getVaddr()); + + // Must discard the request. + req->discard(); + load_req.setRequest(nullptr); + return NoFault; + } + } + } + + // If there's no forwarding case, then go access memory + DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n", + load_inst->seqNum, load_inst->pcState()); + + // Allocate memory if this is the first time a load is issued. + if (!load_inst->memData) { + load_inst->memData = new uint8_t[req->mainRequest()->getSize()]; + } + + + // hardware transactional memory + if (req->mainRequest()->isHTMCmd()) { + // this is a simple sanity check + // the Ruby cache controller will set + // memData to 0x0ul if successful. + *load_inst->memData = (uint64_t) 0x1ull; + } + + // For now, load throughput is constrained by the number of + // load FUs only, and loads do not consume a cache port (only + // stores do). + // @todo We should account for cache port contention + // and arbitrate between loads and stores. + + // if we the cache is not blocked, do cache access + if (req->senderState() == nullptr) { + LQSenderState *state = new LQSenderState( + loadQueue.getIterator(load_idx)); + state->isLoad = true; + state->inst = load_inst; + state->isSplit = req->isSplit(); + req->senderState(state); + } + req->buildPackets(); + req->sendPacketToCache(); + if (!req->isSent()) + iewStage->blockMemInst(load_inst); + + return NoFault; +} + +template +Fault +LSQUnit::write(LSQRequest *req, uint8_t *data, int store_idx) +{ + assert(storeQueue[store_idx].valid()); + + DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x | storeHead:%i " + "[sn:%llu]\n", + store_idx - 1, req->request()->getPaddr(), storeQueue.head() - 1, + storeQueue[store_idx].instruction()->seqNum); + + storeQueue[store_idx].setRequest(req); + unsigned size = req->_size; + storeQueue[store_idx].size() = size; + bool store_no_data = + req->mainRequest()->getFlags() & Request::STORE_NO_DATA; + storeQueue[store_idx].isAllZeros() = store_no_data; + assert(size <= SQEntry::DataSize || store_no_data); + + // copy data into the storeQueue only if the store request has valid data + if (!(req->request()->getFlags() & Request::CACHE_BLOCK_ZERO) && + !req->request()->isCacheMaintenance() && + !req->request()->isAtomic()) + memcpy(storeQueue[store_idx].data(), data, size); + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + #endif//__CPU_O3_LSQ_UNIT_IMPL_HH__ diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh index 8178a4913a..b2cf9bf8d2 100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@ -49,6 +49,7 @@ #include "base/statistics.hh" #include "cpu/inst_seq.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/limits.hh" #include "debug/MemDepUnit.hh" @@ -85,8 +86,6 @@ class MemDepUnit std::string _name; public: - typedef typename Impl::DynInstPtr DynInstPtr; - typedef typename Impl::DynInstConstPtr DynInstConstPtr; typedef typename Impl::O3CPU O3CPU; /** Empty constructor. Must call init() prior to using in this case. */ @@ -117,22 +116,22 @@ class MemDepUnit void setIQ(InstructionQueue *iq_ptr); /** Inserts a memory instruction. */ - void insert(const DynInstPtr &inst); + void insert(const O3DynInstPtr &inst); /** Inserts a non-speculative memory instruction. */ - void insertNonSpec(const DynInstPtr &inst); + void insertNonSpec(const O3DynInstPtr &inst); /** Inserts a barrier instruction. */ - void insertBarrier(const DynInstPtr &barr_inst); + void insertBarrier(const O3DynInstPtr &barr_inst); /** Indicate that an instruction has its registers ready. */ - void regsReady(const DynInstPtr &inst); + void regsReady(const O3DynInstPtr &inst); /** Indicate that a non-speculative instruction is ready. */ - void nonSpecInstReady(const DynInstPtr &inst); + void nonSpecInstReady(const O3DynInstPtr &inst); /** Reschedules an instruction to be re-executed. */ - void reschedule(const DynInstPtr &inst); + void reschedule(const O3DynInstPtr &inst); /** Replays all instructions that have been rescheduled by moving them to * the ready list. @@ -140,7 +139,7 @@ class MemDepUnit void replay(); /** Notifies completion of an instruction. */ - void completeInst(const DynInstPtr &inst); + void completeInst(const O3DynInstPtr &inst); /** Squashes all instructions up until a given sequence number for a * specific thread. @@ -148,11 +147,11 @@ class MemDepUnit void squash(const InstSeqNum &squashed_num, ThreadID tid); /** Indicates an ordering violation between a store and a younger load. */ - void violation(const DynInstPtr &store_inst, - const DynInstPtr &violating_load); + void violation(const O3DynInstPtr &store_inst, + const O3DynInstPtr &violating_load); /** Issues the given instruction */ - void issue(const DynInstPtr &inst); + void issue(const O3DynInstPtr &inst); /** Debugging function to dump the lists of instructions. */ void dumpLists(); @@ -160,12 +159,12 @@ class MemDepUnit private: /** Completes a memory instruction. */ - void completed(const DynInstPtr &inst); + void completed(const O3DynInstPtr &inst); /** Wakes any dependents of a memory instruction. */ - void wakeDependents(const DynInstPtr &inst); + void wakeDependents(const O3DynInstPtr &inst); - typedef typename std::list::iterator ListIt; + typedef typename std::list::iterator ListIt; class MemDepEntry; @@ -179,7 +178,7 @@ class MemDepUnit { public: /** Constructs a memory dependence entry. */ - MemDepEntry(const DynInstPtr &new_inst) + MemDepEntry(const O3DynInstPtr &new_inst) : inst(new_inst), regsReady(false), memDeps(0), completed(false), squashed(false) { @@ -209,7 +208,7 @@ class MemDepUnit std::string name() const { return "memdepentry"; } /** The instruction being tracked. */ - DynInstPtr inst; + O3DynInstPtr inst; /** The iterator to the instruction's location inside the list. */ ListIt listIt; @@ -235,10 +234,10 @@ class MemDepUnit }; /** Finds the memory dependence entry in the hash map. */ - inline MemDepEntryPtr &findInHash(const DynInstConstPtr& inst); + MemDepEntryPtr &findInHash(const O3DynInstConstPtr& inst); /** Moves an entry to the ready list. */ - inline void moveToReady(MemDepEntryPtr &ready_inst_entry); + void moveToReady(MemDepEntryPtr &ready_inst_entry); typedef std::unordered_map MemDepHash; @@ -248,10 +247,10 @@ class MemDepUnit MemDepHash memDepHash; /** A list of all instructions in the memory dependence unit. */ - std::list instList[O3MaxThreads]; + std::list instList[O3MaxThreads]; /** A list of all instructions that are going to be replayed. */ - std::list instsToReplay; + std::list instsToReplay; /** The memory dependence predictor. It is accessed upon new * instructions being added to the IQ, and responds by telling @@ -273,7 +272,7 @@ class MemDepUnit bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); } /** Inserts the SN of a barrier inst. to the list of tracked barriers */ - void insertBarrierSN(const DynInstPtr &barr_inst); + void insertBarrierSN(const O3DynInstPtr &barr_inst); /** Pointer to the IQ. */ InstructionQueue *iqPtr; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh index 4f1f725229..34bba53d95 100644 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ b/src/cpu/o3/mem_dep_unit_impl.hh @@ -172,7 +172,7 @@ MemDepUnit::setIQ(InstructionQueue *iq_ptr) template void -MemDepUnit::insertBarrierSN(const DynInstPtr &barr_inst) +MemDepUnit::insertBarrierSN(const O3DynInstPtr &barr_inst) { InstSeqNum barr_sn = barr_inst->seqNum; @@ -205,7 +205,7 @@ MemDepUnit::insertBarrierSN(const DynInstPtr &barr_inst) template void -MemDepUnit::insert(const DynInstPtr &inst) +MemDepUnit::insert(const O3DynInstPtr &inst) { ThreadID tid = inst->threadNumber; @@ -316,7 +316,7 @@ MemDepUnit::insert(const DynInstPtr &inst) template void -MemDepUnit::insertNonSpec(const DynInstPtr &inst) +MemDepUnit::insertNonSpec(const O3DynInstPtr &inst) { insertBarrier(inst); @@ -338,7 +338,7 @@ MemDepUnit::insertNonSpec(const DynInstPtr &inst) template void -MemDepUnit::insertBarrier(const DynInstPtr &barr_inst) +MemDepUnit::insertBarrier(const O3DynInstPtr &barr_inst) { ThreadID tid = barr_inst->threadNumber; @@ -361,7 +361,7 @@ MemDepUnit::insertBarrier(const DynInstPtr &barr_inst) template void -MemDepUnit::regsReady(const DynInstPtr &inst) +MemDepUnit::regsReady(const O3DynInstPtr &inst) { DPRINTF(MemDepUnit, "Marking registers as ready for " "instruction PC %s [sn:%lli].\n", @@ -384,7 +384,7 @@ MemDepUnit::regsReady(const DynInstPtr &inst) template void -MemDepUnit::nonSpecInstReady(const DynInstPtr &inst) +MemDepUnit::nonSpecInstReady(const O3DynInstPtr &inst) { DPRINTF(MemDepUnit, "Marking non speculative " "instruction PC %s as ready [sn:%lli].\n", @@ -397,7 +397,7 @@ MemDepUnit::nonSpecInstReady(const DynInstPtr &inst) template void -MemDepUnit::reschedule(const DynInstPtr &inst) +MemDepUnit::reschedule(const O3DynInstPtr &inst) { instsToReplay.push_back(inst); } @@ -406,7 +406,7 @@ template void MemDepUnit::replay() { - DynInstPtr temp_inst; + O3DynInstPtr temp_inst; // For now this replay function replays all waiting memory ops. while (!instsToReplay.empty()) { @@ -425,7 +425,7 @@ MemDepUnit::replay() template void -MemDepUnit::completed(const DynInstPtr &inst) +MemDepUnit::completed(const O3DynInstPtr &inst) { DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n", inst->pcState(), inst->seqNum); @@ -449,7 +449,7 @@ MemDepUnit::completed(const DynInstPtr &inst) template void -MemDepUnit::completeInst(const DynInstPtr &inst) +MemDepUnit::completeInst(const O3DynInstPtr &inst) { wakeDependents(inst); completed(inst); @@ -481,7 +481,7 @@ MemDepUnit::completeInst(const DynInstPtr &inst) template void -MemDepUnit::wakeDependents(const DynInstPtr &inst) +MemDepUnit::wakeDependents(const O3DynInstPtr &inst) { // Only stores, atomics and barriers have dependents. if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() && @@ -570,8 +570,8 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, template void -MemDepUnit::violation(const DynInstPtr &store_inst, - const DynInstPtr &violating_load) +MemDepUnit::violation(const O3DynInstPtr &store_inst, + const O3DynInstPtr &violating_load) { DPRINTF(MemDepUnit, "Passing violating PCs to store sets," " load: %#x, store: %#x\n", violating_load->instAddr(), @@ -582,7 +582,7 @@ MemDepUnit::violation(const DynInstPtr &store_inst, template void -MemDepUnit::issue(const DynInstPtr &inst) +MemDepUnit::issue(const O3DynInstPtr &inst) { DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n", inst->instAddr(), inst->seqNum); @@ -592,7 +592,7 @@ MemDepUnit::issue(const DynInstPtr &inst) template inline typename MemDepUnit::MemDepEntryPtr & -MemDepUnit::findInHash(const DynInstConstPtr &inst) +MemDepUnit::findInHash(const O3DynInstConstPtr &inst) { MemDepHashIt hash_it = memDepHash.find(inst->seqNum); diff --git a/src/cpu/o3/probe/elastic_trace.cc b/src/cpu/o3/probe/elastic_trace.cc index 2ae7c97013..afea6135db 100644 --- a/src/cpu/o3/probe/elastic_trace.cc +++ b/src/cpu/o3/probe/elastic_trace.cc @@ -40,6 +40,7 @@ #include "base/callback.hh" #include "base/output.hh" #include "base/trace.hh" +#include "cpu/o3/dyn_inst.hh" #include "cpu/reg_class.hh" #include "debug/ElasticTrace.hh" #include "mem/packet.hh" @@ -124,21 +125,21 @@ ElasticTrace::regEtraceListeners() listeners.push_back(new ProbeListenerArg(this, "FetchRequest", &ElasticTrace::fetchReqTrace)); listeners.push_back(new ProbeListenerArg(this, "Execute", + O3DynInstConstPtr>(this, "Execute", &ElasticTrace::recordExecTick)); listeners.push_back(new ProbeListenerArg(this, "ToCommit", + O3DynInstConstPtr>(this, "ToCommit", &ElasticTrace::recordToCommTick)); listeners.push_back(new ProbeListenerArg(this, "Rename", + O3DynInstConstPtr>(this, "Rename", &ElasticTrace::updateRegDep)); listeners.push_back(new ProbeListenerArg(this, "SquashInRename", &ElasticTrace::removeRegDepMapEntry)); listeners.push_back(new ProbeListenerArg(this, "Squash", + O3DynInstConstPtr>(this, "Squash", &ElasticTrace::addSquashedInst)); listeners.push_back(new ProbeListenerArg(this, "Commit", + O3DynInstConstPtr>(this, "Commit", &ElasticTrace::addCommittedInst)); allProbesReg = true; } @@ -166,7 +167,7 @@ ElasticTrace::fetchReqTrace(const RequestPtr &req) } void -ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst) +ElasticTrace::recordExecTick(const O3DynInstConstPtr& dyn_inst) { // In a corner case, a retired instruction is propagated backward to the @@ -203,7 +204,7 @@ ElasticTrace::recordExecTick(const DynInstConstPtr& dyn_inst) } void -ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst) +ElasticTrace::recordToCommTick(const O3DynInstConstPtr& dyn_inst) { // If tracing has just been enabled then the instruction at this stage of // execution is far enough that we cannot gather info about its past like @@ -224,7 +225,7 @@ ElasticTrace::recordToCommTick(const DynInstConstPtr& dyn_inst) } void -ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst) +ElasticTrace::updateRegDep(const O3DynInstConstPtr& dyn_inst) { // Get the sequence number of the instruction InstSeqNum seq_num = dyn_inst->seqNum; @@ -303,7 +304,7 @@ ElasticTrace::removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair) } void -ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst) +ElasticTrace::addSquashedInst(const O3DynInstConstPtr& head_inst) { // If the squashed instruction was squashed before being processed by // execute stage then it will not be in the temporary store. In this case @@ -331,7 +332,7 @@ ElasticTrace::addSquashedInst(const DynInstConstPtr& head_inst) } void -ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst) +ElasticTrace::addCommittedInst(const O3DynInstConstPtr& head_inst) { DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n", head_inst->seqNum); @@ -390,7 +391,7 @@ ElasticTrace::addCommittedInst(const DynInstConstPtr& head_inst) } void -ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst, +ElasticTrace::addDepTraceRecord(const O3DynInstConstPtr& head_inst, InstExecInfo* exec_info_ptr, bool commit) { // Create a record to assign dynamic intruction related fields. @@ -652,7 +653,7 @@ ElasticTrace::hasCompCompleted(TraceInfo* past_record, } void -ElasticTrace::clearTempStoreUntil(const DynInstConstPtr& head_inst) +ElasticTrace::clearTempStoreUntil(const O3DynInstConstPtr& head_inst) { // Clear from temp store starting with the execution info object // corresponding the head_inst and continue clearing by decrementing the diff --git a/src/cpu/o3/probe/elastic_trace.hh b/src/cpu/o3/probe/elastic_trace.hh index fb802d5fd0..70f2763b8e 100644 --- a/src/cpu/o3/probe/elastic_trace.hh +++ b/src/cpu/o3/probe/elastic_trace.hh @@ -50,7 +50,7 @@ #include #include -#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/impl.hh" #include "mem/request.hh" #include "params/ElasticTrace.hh" @@ -85,8 +85,6 @@ class ElasticTrace : public ProbeListenerObject { public: - typedef typename O3CPUImpl::DynInstPtr DynInstPtr; - typedef typename O3CPUImpl::DynInstConstPtr DynInstConstPtr; typedef typename std::pair SeqNumRegPair; /** Trace record types corresponding to instruction node types */ @@ -129,7 +127,7 @@ class ElasticTrace : public ProbeListenerObject * * @param dyn_inst pointer to dynamic instruction in flight */ - void recordExecTick(const DynInstConstPtr& dyn_inst); + void recordExecTick(const O3DynInstConstPtr& dyn_inst); /** * Populate the timestamp field in an InstExecInfo object for an @@ -138,7 +136,7 @@ class ElasticTrace : public ProbeListenerObject * * @param dyn_inst pointer to dynamic instruction in flight */ - void recordToCommTick(const DynInstConstPtr& dyn_inst); + void recordToCommTick(const O3DynInstConstPtr& dyn_inst); /** * Record a Read After Write physical register dependency if there has @@ -149,7 +147,7 @@ class ElasticTrace : public ProbeListenerObject * * @param dyn_inst pointer to dynamic instruction in flight */ - void updateRegDep(const DynInstConstPtr& dyn_inst); + void updateRegDep(const O3DynInstConstPtr& dyn_inst); /** * When an instruction gets squashed the destination register mapped to it @@ -166,14 +164,14 @@ class ElasticTrace : public ProbeListenerObject * * @param head_inst pointer to dynamic instruction to be squashed */ - void addSquashedInst(const DynInstConstPtr& head_inst); + void addSquashedInst(const O3DynInstConstPtr& head_inst); /** * Add an instruction that is at the head of the ROB and is committed. * * @param head_inst pointer to dynamic instruction to be committed */ - void addCommittedInst(const DynInstConstPtr& head_inst); + void addCommittedInst(const O3DynInstConstPtr& head_inst); /** Event to trigger registering this listener for all probe points. */ EventFunctionWrapper regEtraceListenersEvent; @@ -379,7 +377,7 @@ class ElasticTrace : public ProbeListenerObject * @param exec_info_ptr Pointer to InstExecInfo for that instruction * @param commit True if instruction is committed, false if squashed */ - void addDepTraceRecord(const DynInstConstPtr& head_inst, + void addDepTraceRecord(const O3DynInstConstPtr& head_inst, InstExecInfo* exec_info_ptr, bool commit); /** @@ -388,7 +386,7 @@ class ElasticTrace : public ProbeListenerObject * * @param head_inst pointer to dynamic instruction */ - void clearTempStoreUntil(const DynInstConstPtr& head_inst); + void clearTempStoreUntil(const O3DynInstConstPtr& head_inst); /** * Calculate the computational delay between an instruction and a diff --git a/src/cpu/o3/probe/simple_trace.cc b/src/cpu/o3/probe/simple_trace.cc index cc4cceaa67..fc2282e32d 100644 --- a/src/cpu/o3/probe/simple_trace.cc +++ b/src/cpu/o3/probe/simple_trace.cc @@ -38,16 +38,17 @@ #include "cpu/o3/probe/simple_trace.hh" #include "base/trace.hh" +#include "cpu/o3/dyn_inst.hh" #include "debug/SimpleTrace.hh" -void SimpleTrace::traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst) +void SimpleTrace::traceCommit(const O3DynInstConstPtr& dynInst) { DPRINTFR(SimpleTrace, "[%s]: Commit 0x%08x %s.\n", name(), dynInst->instAddr(), dynInst->staticInst->disassemble(dynInst->instAddr())); } -void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst) +void SimpleTrace::traceFetch(const O3DynInstConstPtr& dynInst) { DPRINTFR(SimpleTrace, "[%s]: Fetch 0x%08x %s.\n", name(), dynInst->instAddr(), @@ -57,7 +58,7 @@ void SimpleTrace::traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst) void SimpleTrace::regProbeListeners() { typedef ProbeListenerArg DynInstListener; + O3DynInstConstPtr> DynInstListener; listeners.push_back(new DynInstListener(this, "Commit", &SimpleTrace::traceCommit)); listeners.push_back(new DynInstListener(this, "Fetch", diff --git a/src/cpu/o3/probe/simple_trace.hh b/src/cpu/o3/probe/simple_trace.hh index e73779a981..abcce0f24b 100644 --- a/src/cpu/o3/probe/simple_trace.hh +++ b/src/cpu/o3/probe/simple_trace.hh @@ -44,7 +44,7 @@ #ifndef __CPU_O3_PROBE_SIMPLE_TRACE_HH__ #define __CPU_O3_PROBE_SIMPLE_TRACE_HH__ -#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/impl.hh" #include "params/SimpleTrace.hh" #include "sim/probe/probe.hh" @@ -69,8 +69,8 @@ class SimpleTrace : public ProbeListenerObject } private: - void traceFetch(const O3CPUImpl::DynInstConstPtr& dynInst); - void traceCommit(const O3CPUImpl::DynInstConstPtr& dynInst); + void traceFetch(const O3DynInstConstPtr& dynInst); + void traceCommit(const O3DynInstConstPtr& dynInst); }; #endif//__CPU_O3_PROBE_SIMPLE_TRACE_HH__ diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh index eac8e30a3b..2c4796a0bd 100644 --- a/src/cpu/o3/rename.hh +++ b/src/cpu/o3/rename.hh @@ -48,6 +48,7 @@ #include "base/statistics.hh" #include "config/the_isa.hh" #include "cpu/o3/commit.hh" +#include "cpu/o3/dyn_inst_ptr.hh" #include "cpu/o3/free_list.hh" #include "cpu/o3/iew.hh" #include "cpu/o3/limits.hh" @@ -73,7 +74,6 @@ class DefaultRename { public: // Typedefs from the Impl. - typedef typename Impl::DynInstPtr DynInstPtr; typedef typename Impl::O3CPU O3CPU; typedef typename Impl::DecodeStruct DecodeStruct; typedef typename Impl::RenameStruct RenameStruct; @@ -83,7 +83,7 @@ class DefaultRename // be added to the front of the queue, which is the only reason for // using a deque instead of a queue. (Most other stages use a // queue) - typedef std::deque InstQueue; + typedef std::deque InstQueue; public: /** Overall rename status. Used to determine if the CPU can @@ -117,7 +117,7 @@ class DefaultRename /** Probe points. */ typedef typename std::pair SeqNumRegPair; /** To probe when register renaming for an instruction is complete */ - ProbePointArg *ppRename; + ProbePointArg *ppRename; /** * To probe when an instruction is squashed and the register mapping * for it needs to be undone @@ -248,22 +248,22 @@ class DefaultRename void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid); /** Renames the source registers of an instruction. */ - inline void renameSrcRegs(const DynInstPtr &inst, ThreadID tid); + void renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid); /** Renames the destination registers of an instruction. */ - inline void renameDestRegs(const DynInstPtr &inst, ThreadID tid); + void renameDestRegs(const O3DynInstPtr &inst, ThreadID tid); /** Calculates the number of free ROB entries for a specific thread. */ - inline int calcFreeROBEntries(ThreadID tid); + int calcFreeROBEntries(ThreadID tid); /** Calculates the number of free IQ entries for a specific thread. */ - inline int calcFreeIQEntries(ThreadID tid); + int calcFreeIQEntries(ThreadID tid); /** Calculates the number of free LQ entries for a specific thread. */ - inline int calcFreeLQEntries(ThreadID tid); + int calcFreeLQEntries(ThreadID tid); /** Calculates the number of free SQ entries for a specific thread. */ - inline int calcFreeSQEntries(ThreadID tid); + int calcFreeSQEntries(ThreadID tid); /** Returns the number of valid instructions coming from decode. */ unsigned validInsts(); @@ -417,7 +417,7 @@ class DefaultRename Stalls stalls[O3MaxThreads]; /** The serialize instruction that rename has stalled on. */ - DynInstPtr serializeInst[O3MaxThreads]; + O3DynInstPtr serializeInst[O3MaxThreads]; /** Records if rename needs to serialize on the next instruction for any * thread. diff --git a/src/cpu/o3/rename_impl.hh b/src/cpu/o3/rename_impl.hh index f48a89257c..bc33c5d83d 100644 --- a/src/cpu/o3/rename_impl.hh +++ b/src/cpu/o3/rename_impl.hh @@ -177,7 +177,8 @@ template void DefaultRename::regProbePoints() { - ppRename = new ProbePointArg(cpu->getProbeManager(), "Rename"); + ppRename = new ProbePointArg( + cpu->getProbeManager(), "Rename"); ppSquashInRename = new ProbePointArg(cpu->getProbeManager(), "SquashInRename"); } @@ -612,11 +613,12 @@ DefaultRename::renameInsts(ThreadID tid) assert(!insts_to_rename.empty()); - DynInstPtr inst = insts_to_rename.front(); + O3DynInstPtr inst = insts_to_rename.front(); - //For all kind of instructions, check ROB and IQ first - //For load instruction, check LQ size and take into account the inflight loads - //For store instruction, check SQ size and take into account the inflight stores + //For all kind of instructions, check ROB and IQ first For load + //instruction, check LQ size and take into account the inflight loads + //For store instruction, check SQ size and take into account the + //inflight stores if (inst->isLoad()) { if (calcFreeLQEntries(tid) <= 0) { @@ -774,7 +776,7 @@ template void DefaultRename::skidInsert(ThreadID tid) { - DynInstPtr inst = NULL; + O3DynInstPtr inst = NULL; while (!insts[tid].empty()) { inst = insts[tid].front(); @@ -811,7 +813,7 @@ DefaultRename::sortInsts() { int insts_from_decode = fromDecode->size; for (int i = 0; i < insts_from_decode; ++i) { - const DynInstPtr &inst = fromDecode->insts[i]; + const O3DynInstPtr &inst = fromDecode->insts[i]; insts[inst->threadNumber].push_back(inst); #if TRACING_ON if (Debug::O3PipeView) { @@ -1035,7 +1037,7 @@ DefaultRename::removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid) template inline void -DefaultRename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) +DefaultRename::renameSrcRegs(const O3DynInstPtr &inst, ThreadID tid) { ThreadContext *tc = inst->tcBase(); UnifiedRenameMap *map = renameMap[tid]; @@ -1102,7 +1104,7 @@ DefaultRename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid) template inline void -DefaultRename::renameDestRegs(const DynInstPtr &inst, ThreadID tid) +DefaultRename::renameDestRegs(const O3DynInstPtr &inst, ThreadID tid) { ThreadContext *tc = inst->tcBase(); UnifiedRenameMap *map = renameMap[tid]; @@ -1369,7 +1371,7 @@ DefaultRename::checkSignalsAndUpdate(ThreadID tid) DPRINTF(Rename, "[tid:%i] Done with serialize stall, switching to " "unblocking.\n", tid); - DynInstPtr serial_inst = serializeInst[tid]; + O3DynInstPtr serial_inst = serializeInst[tid]; renameStatus[tid] = Unblocking; diff --git a/src/cpu/o3/rob.hh b/src/cpu/o3/rob.hh index ba5e027176..1259b53f8d 100644 --- a/src/cpu/o3/rob.hh +++ b/src/cpu/o3/rob.hh @@ -60,10 +60,9 @@ class ROB public: //Typedefs from the Impl. typedef typename Impl::O3CPU O3CPU; - typedef typename Impl::DynInstPtr DynInstPtr; typedef std::pair UnmapInfo; - typedef typename std::list::iterator InstIt; + typedef typename std::list::iterator InstIt; /** Possible ROB statuses. */ enum Status @@ -105,36 +104,36 @@ class ROB * ROB for the new instruction. * @param inst The instruction being inserted into the ROB. */ - void insertInst(const DynInstPtr &inst); + void insertInst(const O3DynInstPtr &inst); /** Returns pointer to the head instruction within the ROB. There is * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the head of the ROB. */ -// DynInstPtr readHeadInst(); +// O3DynInstPtr readHeadInst(); /** Returns a pointer to the head instruction of a specific thread within * the ROB. * @return Pointer to the DynInst that is at the head of the ROB. */ - const DynInstPtr &readHeadInst(ThreadID tid); + const O3DynInstPtr &readHeadInst(ThreadID tid); /** Returns a pointer to the instruction with the given sequence if it is * in the ROB. */ - DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst); + O3DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst); /** Returns pointer to the tail instruction within the ROB. There is * no guarantee as to the return value if the ROB is empty. * @retval Pointer to the DynInst that is at the tail of the ROB. */ -// DynInstPtr readTailInst(); +// O3DynInstPtr readTailInst(); /** Returns a pointer to the tail instruction of a specific thread within * the ROB. * @return Pointer to the DynInst that is at the tail of the ROB. */ - DynInstPtr readTailInst(ThreadID tid); + O3DynInstPtr readTailInst(ThreadID tid); /** Retires the head instruction, removing it from the ROB. */ // void retireHead(); @@ -277,7 +276,7 @@ class ROB unsigned maxEntries[O3MaxThreads]; /** ROB List of Instructions */ - std::list instList[O3MaxThreads]; + std::list instList[O3MaxThreads]; /** Number of instructions that can be squashed in a single cycle. */ unsigned squashWidth; @@ -308,7 +307,7 @@ class ROB int numInstsInROB; /** Dummy instruction returned if there are no insts left. */ - DynInstPtr dummyInst; + O3DynInstPtr dummyInst; private: /** The sequence number of the squashed instruction. */ diff --git a/src/cpu/o3/rob_impl.hh b/src/cpu/o3/rob_impl.hh index 0f192b789d..6bdf23a790 100644 --- a/src/cpu/o3/rob_impl.hh +++ b/src/cpu/o3/rob_impl.hh @@ -200,7 +200,7 @@ ROB::countInsts(ThreadID tid) template void -ROB::insertInst(const DynInstPtr &inst) +ROB::insertInst(const O3DynInstPtr &inst) { assert(inst); @@ -246,7 +246,7 @@ ROB::retireHead(ThreadID tid) // Get the head ROB instruction by copying it and remove it from the list InstIt head_it = instList[tid].begin(); - DynInstPtr head_inst = std::move(*head_it); + O3DynInstPtr head_inst = std::move(*head_it); instList[tid].erase(head_it); assert(head_inst->readyToCommit()); @@ -428,7 +428,7 @@ ROB::updateHead() InstIt head_thread = instList[tid].begin(); - DynInstPtr head_inst = (*head_thread); + O3DynInstPtr head_inst = (*head_thread); assert(head_inst != 0); @@ -513,7 +513,7 @@ ROB::squash(InstSeqNum squash_num, ThreadID tid) } template -const typename Impl::DynInstPtr& +const O3DynInstPtr& ROB::readHeadInst(ThreadID tid) { if (threadEntries[tid] != 0) { @@ -528,7 +528,7 @@ ROB::readHeadInst(ThreadID tid) } template -typename Impl::DynInstPtr +O3DynInstPtr ROB::readTailInst(ThreadID tid) { InstIt tail_thread = instList[tid].end(); @@ -546,7 +546,7 @@ ROB::ROBStats::ROBStats(Stats::Group *parent) } template -typename Impl::DynInstPtr +O3DynInstPtr ROB::findInst(ThreadID tid, InstSeqNum squash_inst) { for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {