From 5387e6711433b7b0d18b629f5b36cdb44355a84c Mon Sep 17 00:00:00 2001 From: David Schall Date: Tue, 5 Sep 2023 08:01:13 +0000 Subject: [PATCH] cpu: Restructure RAS The return address stack (RAS) is restructured to be a separate SimObject. This enables disabling the RAS and better separation of the functionality. Furthermore, easier statistics and debugging. Change-Id: I8aacf7d4c8e308165d0e7e15bc5a5d0df77f8192 Signed-off-by: David Schall --- configs/common/cores/arm/HPI.py | 2 +- configs/common/cores/arm/O3_ARM_v7a.py | 2 +- configs/common/cores/arm/ex5_big.py | 2 +- src/cpu/pred/BranchPredictor.py | 15 +- src/cpu/pred/SConscript | 2 + src/cpu/pred/bpred_unit.cc | 169 +++++------- src/cpu/pred/bpred_unit.hh | 29 +- src/cpu/pred/ras.cc | 256 +++++++++++++++++- src/cpu/pred/ras.hh | 201 +++++++++++--- .../riscvmatched/riscvmatched_core.py | 2 +- 10 files changed, 508 insertions(+), 172 deletions(-) diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py index f7e9348622..8fe396abfa 100644 --- a/configs/common/cores/arm/HPI.py +++ b/configs/common/cores/arm/HPI.py @@ -1687,6 +1687,7 @@ class HPI_BTB(SimpleBTB): class HPI_BP(TournamentBP): btb = HPI_BTB() + ras = ReturnAddrStack(numEntries=8) localPredictorSize = 64 localCtrBits = 2 localHistoryTableSize = 64 @@ -1694,7 +1695,6 @@ class HPI_BP(TournamentBP): globalCtrBits = 2 choicePredictorSize = 1024 choiceCtrBits = 2 - RASSize = 8 instShiftAmt = 2 diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py index e8a7826372..de258324be 100644 --- a/configs/common/cores/arm/O3_ARM_v7a.py +++ b/configs/common/cores/arm/O3_ARM_v7a.py @@ -116,11 +116,11 @@ class O3_ARM_v7a_BTB(SimpleBTB): # Bi-Mode Branch Predictor class O3_ARM_v7a_BP(BiModeBP): btb = O3_ARM_v7a_BTB() + ras = ReturnAddrStack(numEntries=16) globalPredictorSize = 8192 globalCtrBits = 2 choicePredictorSize = 8192 choiceCtrBits = 2 - RASSize = 16 instShiftAmt = 2 diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py index daf5102e28..7803c1e0cc 100644 --- a/configs/common/cores/arm/ex5_big.py +++ b/configs/common/cores/arm/ex5_big.py @@ -113,11 +113,11 @@ class ex5_big_BTB(SimpleBTB): # Bi-Mode Branch Predictor class ex5_big_BP(BiModeBP): btb = ex5_big_BTB() + ras = ReturnAddrStack(numEntries=48) globalPredictorSize = 4096 globalCtrBits = 2 choicePredictorSize = 1024 choiceCtrBits = 3 - RASSize = 48 instShiftAmt = 2 diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index ed356b166d..5ae4ee8963 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -57,6 +57,15 @@ class BranchType(Enum): ] +class ReturnAddrStack(SimObject): + type = "ReturnAddrStack" + cxx_class = "gem5::branch_prediction::ReturnAddrStack" + cxx_header = "cpu/pred/ras.hh" + + numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") + numEntries = Param.Unsigned(16, "Number of RAS entries") + + class BranchTargetBuffer(ClockedObject): type = "BranchTargetBuffer" cxx_class = "gem5::branch_prediction::BranchTargetBuffer" @@ -120,10 +129,10 @@ class BranchPredictor(SimObject): numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") - RASSize = Param.Unsigned(16, "RAS size") - btb = Param.BranchTargetBuffer(SimpleBTB(), "Branch target buffer (BTB)") - + ras = Param.ReturnAddrStack( + ReturnAddrStack(), "Return address stack, set to NULL to disable RAS." + ) indirectBranchPred = Param.IndirectPredictor( SimpleIndirectPredictor(), "Indirect branch predictor, set to NULL to disable " diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index 89054ba8e8..c6c2a94cc8 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -46,6 +46,7 @@ SimObject('BranchPredictor.py', 'BranchPredictor', 'IndirectPredictor', 'SimpleIndirectPredictor', 'BranchTargetBuffer', 'SimpleBTB', + 'ReturnAddrStack', 'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor', 'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB', 'LTAGE', 'TAGE_SC_L_LoopPredictor', 'StatisticalCorrector', 'TAGE_SC_L', @@ -85,6 +86,7 @@ Source('btb.cc') Source('simple_btb.cc') DebugFlag('Indirect') DebugFlag('BTB') +DebugFlag('RAS') DebugFlag('FreeList') DebugFlag('Branch') DebugFlag('Tage') diff --git a/src/cpu/pred/bpred_unit.cc b/src/cpu/pred/bpred_unit.cc index 813ac86f24..a338cbdafd 100644 --- a/src/cpu/pred/bpred_unit.cc +++ b/src/cpu/pred/bpred_unit.cc @@ -60,13 +60,11 @@ BPredUnit::BPredUnit(const Params ¶ms) numThreads(params.numThreads), predHist(numThreads), btb(params.btb), - RAS(numThreads), + ras(params.ras), iPred(params.indirectBranchPred), stats(this), instShiftAmt(params.instShiftAmt) { - for (auto& r : RAS) - r.init(params.RASSize); } BPredUnit::BPredUnitStats::BPredUnitStats(statistics::Group *parent) @@ -172,19 +170,12 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, // support coroutines. if (inst->isReturn()) { ++stats.RASUsed; - predict_record.wasReturn = true; - // If it's a function return call, then look up the address - // in the RAS. - const PCStateBase *ras_top = RAS[tid].top(); - if (ras_top) - set(target, inst->buildRetPC(pc, *ras_top)); - - // Record the top entry of the RAS, and its index. - predict_record.usedRAS = true; - predict_record.RASIndex = RAS[tid].topIdx(); - set(predict_record.RASTarget, ras_top); - - RAS[tid].pop(); + // If it's a return from a function call, then look up the + // RETURN address in the RAS. + const PCStateBase *return_addr = ras->pop(tid, + predict_record.rasHistory); + if (return_addr) + set(target, return_addr); DPRINTF(Branch, "[tid:%i] [sn:%llu] Instruction %s is a return, " "RAS predicted target: %s, RAS index: %i\n", @@ -192,17 +183,17 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, } if (inst->isCall()) { - RAS[tid].push(pc); - predict_record.pushedRAS = true; + // In case of a call build the return address and + // push it to the RAS. + auto return_addr = inst->buildRetPC(pc, pc); + ras->push(tid, *return_addr, predict_record.rasHistory); // Record that it was a call so that the top RAS entry can // be popped off if the speculation is incorrect. - predict_record.wasCall = true; + DPRINTF(Branch, "[tid:%i] [sn:%llu] Instr. %s was " + "a call, push return address %s onto the RAS\n", + tid, seqNum, pc, *return_addr); - DPRINTF(Branch, - "[tid:%i] [sn:%llu] Instruction %s was a call, adding " - "%s to the RAS index: %i\n", - tid, seqNum, pc, pc, RAS[tid].topIdx()); } // The target address is not predicted by RAS. @@ -237,7 +228,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, "called for %s\n", tid, seqNum, pc); } else if (inst->isCall() && !inst->isUncondCtrl()) { - RAS[tid].pop(); + ras->squash(tid, predict_record.rasHistory); predict_record.pushedRAS = false; } inst->advancePC(*target); @@ -269,17 +260,13 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum, if (!inst->isCall() && !inst->isReturn()) { } else if (inst->isCall() && !inst->isUncondCtrl()) { - RAS[tid].pop(); - predict_record.pushedRAS = false; + ras->squash(tid, predict_record.rasHistory); } inst->advancePC(*target); } } } } else { - if (inst->isReturn()) { - predict_record.wasReturn = true; - } inst->advancePC(*target); } predict_record.target = target->instAddr(); @@ -325,6 +312,12 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid) predHist[tid].back().indirectHistory); } + if (ras) { + ras->commit(tid, predHist[tid].back().mispredict, + getBranchType(predHist[tid].back().inst), + predHist[tid].back().rasHistory); + } + predHist[tid].pop_back(); } } @@ -336,30 +329,15 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid) while (!pred_hist.empty() && pred_hist.front().seqNum > squashed_sn) { - if (pred_hist.front().wasCall && pred_hist.front().pushedRAS) { - // Was a call but predicated false. Pop RAS here - DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Squashing" - " Call [sn:%llu] PC: %s Popping RAS\n", tid, squashed_sn, - pred_hist.front().seqNum, pred_hist.front().pc); - RAS[tid].pop(); - } - if (pred_hist.front().usedRAS) { - if (pred_hist.front().RASTarget != nullptr) { - DPRINTF(Branch, "[tid:%i] [squash sn:%llu]" - " Restoring top of RAS to: %i," - " target: %s\n", tid, squashed_sn, - pred_hist.front().RASIndex, - *pred_hist.front().RASTarget); - } - else { - DPRINTF(Branch, "[tid:%i] [squash sn:%llu]" - " Restoring top of RAS to: %i," - " target: INVALID_TARGET\n", tid, squashed_sn, - pred_hist.front().RASIndex); - } - RAS[tid].restore(pred_hist.front().RASIndex, - pred_hist.front().RASTarget.get()); + if (pred_hist.front().rasHistory) { + assert(ras); + + DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Incorrect call/return " + "PC %#x. Fix RAS.\n", tid, pred_hist.front().seqNum, + pred_hist.front().pc); + + ras->squash(tid, pred_hist.front().rasHistory); } // This call should delete the bpHistory. @@ -425,8 +403,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, assert(pred_hist.front().seqNum == squashed_sn); } - - if ((*hist_it).usedRAS) { + if ((*hist_it).rasHistory) { ++stats.RASIncorrect; DPRINTF(Branch, "[tid:%i] [squash sn:%llu] Incorrect RAS [sn:%llu]\n", @@ -445,6 +422,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, // Remember the correct direction for the update at commit. pred_hist.front().predTaken = actually_taken; pred_hist.front().target = corr_target.instAddr(); + pred_hist.front().mispredict = true; update(tid, (*hist_it).pc, actually_taken, pred_hist.front().bpHistory, true, pred_hist.front().inst, @@ -459,16 +437,45 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, } - if (actually_taken) { - if (hist_it->wasReturn && !hist_it->usedRAS) { - DPRINTF(Branch, "[tid:%i] [squash sn:%llu] " - "Incorrectly predicted " - "return [sn:%llu] PC: %#x\n", tid, squashed_sn, - hist_it->seqNum, - hist_it->pc); - RAS[tid].pop(); - hist_it->usedRAS = true; + // Correct RAS --------------------------------- + if (ras) { + // The branch was taken and the RAS was not updated. + // In case of call or return that needs to be fixed. + if (actually_taken && (hist_it->rasHistory == nullptr)) { + + // A return has not poped the RAS. + if (hist_it->inst->isReturn()) { + DPRINTF(Branch, "[tid:%i] [squash sn:%llu] " + "Incorrectly predicted return [sn:%llu] PC: %#x\n", + tid, squashed_sn, hist_it->seqNum, hist_it->pc); + + ras->pop(tid, hist_it->rasHistory); + } + + // A call has not pushed a return address to the RAS. + if (hist_it->inst->isCall()) { + // In case of a call build the return address and + // push it to the RAS. + auto return_addr = hist_it->inst->buildRetPC( + corr_target, corr_target); + + DPRINTF(Branch, "[tid:%i] [squash sn:%llu] " + "Incorrectly predicted call: [sn:%llu,PC:%#x] " + " Push return address %s onto RAS\n", tid, + squashed_sn, hist_it->seqNum, hist_it->pc, + *return_addr); + ras->push(tid, *return_addr, hist_it->rasHistory); + } + + // The branch was not taken but the RAS modified. + } else if (!actually_taken && (hist_it->rasHistory != nullptr)) { + // The branch was not taken but the RAS was modified. + // Needs to be fixed. + ras->squash(tid, hist_it->rasHistory); } + } + + if (actually_taken) { if (hist_it->wasIndirect) { ++stats.indirectMispredicted; } else { @@ -481,42 +488,6 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, btb->update(tid, hist_it->pc, corr_target, getBranchType(hist_it->inst)); } - } else { - //Actually not Taken - if (hist_it->wasCall && hist_it->pushedRAS) { - //Was a Call but predicated false. Pop RAS here - DPRINTF(Branch, - "[tid:%i] [squash sn:%llu] " - "Incorrectly predicted " - "Call [sn:%llu] PC: %s Popping RAS\n", - tid, squashed_sn, - hist_it->seqNum, hist_it->pc); - RAS[tid].pop(); - hist_it->pushedRAS = false; - } - if (hist_it->usedRAS) { - - std::string RASTarget; - - DPRINTF(Branch, - "[tid:%i] [squash sn:%llu] Incorrectly predicted " - "return [sn:%llu] PC: %#x Restoring RAS\n", tid, - squashed_sn, - hist_it->seqNum, hist_it->pc); - if (hist_it->RASTarget) { - std::ostringstream os; - os << *hist_it->RASTarget.get(); - RASTarget = os.str(); - } else { - RASTarget = "no RAS"; - } - DPRINTF(Branch, - "[tid:%i] [squash sn:%llu] Restoring top of RAS " - "to: %i, target: %s\n", tid, squashed_sn, - hist_it->RASIndex, RASTarget.c_str()); - RAS[tid].restore(hist_it->RASIndex, hist_it->RASTarget.get()); - hist_it->usedRAS = false; - } } } else { DPRINTF(Branch, "[tid:%i] [sn:%llu] pred_hist empty, can't " diff --git a/src/cpu/pred/bpred_unit.hh b/src/cpu/pred/bpred_unit.hh index 1b10d44a7c..18f3f562cb 100644 --- a/src/cpu/pred/bpred_unit.hh +++ b/src/cpu/pred/bpred_unit.hh @@ -218,17 +218,19 @@ class BPredUnit : public SimObject void *indirect_history, ThreadID _tid, const StaticInstPtr & inst) : seqNum(seq_num), pc(instPC), bpHistory(bp_history), - indirectHistory(indirect_history), tid(_tid), + indirectHistory(indirect_history), rasHistory(nullptr), + tid(_tid), predTaken(pred_taken), inst(inst) {} PredictorHistory(const PredictorHistory &other) : seqNum(other.seqNum), pc(other.pc), bpHistory(other.bpHistory), - indirectHistory(other.indirectHistory), RASIndex(other.RASIndex), + indirectHistory(other.indirectHistory), + rasHistory(other.rasHistory), RASIndex(other.RASIndex), tid(other.tid), predTaken(other.predTaken), usedRAS(other.usedRAS), - pushedRAS(other.pushedRAS), wasCall(other.wasCall), - wasReturn(other.wasReturn), wasIndirect(other.wasIndirect), - target(other.target), inst(other.inst) + pushedRAS(other.pushedRAS), wasIndirect(other.wasIndirect), + target(other.target), inst(other.inst), + mispredict(other.mispredict) { set(RASTarget, other.RASTarget); } @@ -253,6 +255,8 @@ class BPredUnit : public SimObject void *indirectHistory = nullptr; + void *rasHistory = nullptr; + /** The RAS target (only valid if a return). */ std::unique_ptr RASTarget; @@ -271,12 +275,6 @@ class BPredUnit : public SimObject /* Whether or not the RAS was pushed */ bool pushedRAS = false; - /** Whether or not the instruction was a call. */ - bool wasCall = false; - - /** Whether or not the instruction was a return. */ - bool wasReturn = false; - /** Wether this instruction was an indirect branch */ bool wasIndirect = false; @@ -287,6 +285,9 @@ class BPredUnit : public SimObject /** The branch instrction */ const StaticInstPtr inst; + + /** Whether this branch was mispredicted */ + bool mispredict = false; }; typedef std::deque History; @@ -303,10 +304,10 @@ class BPredUnit : public SimObject std::vector predHist; /** The BTB. */ - BranchTargetBuffer* btb; + BranchTargetBuffer * btb; - /** The per-thread return address stack. */ - std::vector RAS; + /** The return address stack. */ + ReturnAddrStack * ras; /** The indirect target predictor. */ IndirectPredictor * iPred; diff --git a/src/cpu/pred/ras.cc b/src/cpu/pred/ras.cc index 8d415b7fbd..f29b265657 100644 --- a/src/cpu/pred/ras.cc +++ b/src/cpu/pred/ras.cc @@ -1,4 +1,16 @@ /* + * Copyright (c) 2022-2023 The University of Edinburgh + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * All rights reserved. * @@ -28,30 +40,46 @@ #include "cpu/pred/ras.hh" +#include + +#include "debug/RAS.hh" + namespace gem5 { namespace branch_prediction { + void -ReturnAddrStack::init(unsigned _numEntries) +ReturnAddrStack::AddrStack::init(unsigned _numEntries) { numEntries = _numEntries; addrStack.resize(numEntries); + for (unsigned i = 0; i < numEntries; ++i) { + addrStack[i] = nullptr; + } reset(); } void -ReturnAddrStack::reset() +ReturnAddrStack::AddrStack::reset() { usedEntries = 0; tos = 0; } -void -ReturnAddrStack::push(const PCStateBase &return_addr) +const PCStateBase * +ReturnAddrStack::AddrStack::top() { + return addrStack[tos].get(); +} + + +void +ReturnAddrStack::AddrStack::push(const PCStateBase &return_addr) +{ + incrTos(); set(addrStack[tos], return_addr); @@ -62,7 +90,7 @@ ReturnAddrStack::push(const PCStateBase &return_addr) } void -ReturnAddrStack::pop() +ReturnAddrStack::AddrStack::pop() { if (usedEntries > 0) { --usedEntries; @@ -72,9 +100,10 @@ ReturnAddrStack::pop() } void -ReturnAddrStack::restore(unsigned top_entry_idx, const PCStateBase *restored) +ReturnAddrStack::AddrStack::restore(unsigned _tos, + const PCStateBase *restored) { - tos = top_entry_idx; + tos = _tos; set(addrStack[tos], restored); @@ -83,5 +112,218 @@ ReturnAddrStack::restore(unsigned top_entry_idx, const PCStateBase *restored) } } +std::string +ReturnAddrStack::AddrStack::toString(int n) +{ + std::stringstream ss; + for (int i = 0; i < n; i++) { + int idx = int(tos)-i; + if (idx < 0 || addrStack[idx] == nullptr) { + break; + } + ss << std::dec << idx << ":0x" << std::setfill('0') << std::setw(16) + << std::hex << addrStack[idx]->instAddr() << ";"; + } + return ss.str(); +} + + +// Return address stack class. +// + +ReturnAddrStack::ReturnAddrStack(const Params &p) + : SimObject(p), + numEntries(p.numEntries), + numThreads(p.numThreads), + stats(this) +{ + DPRINTF(RAS, "Create RAS stacks.\n"); + + for (unsigned i = 0; i < numThreads; ++i) { + addrStacks.emplace_back(*this); + addrStacks[i].init(numEntries); + } +} + +void +ReturnAddrStack::reset() +{ + DPRINTF(RAS, "RAS Reset.\n"); + for (auto& r : addrStacks) + r.reset(); +} + +void +ReturnAddrStack::makeRASHistory(void* &ras_history) +{ + RASHistory* history = new RASHistory; + history->pushed = false; + history->poped = false; + ras_history = static_cast(history); +} + +void +ReturnAddrStack::push(ThreadID tid, const PCStateBase &pc, + void * &ras_history) +{ + // Note: The RAS may be both popped and pushed to + // support coroutines. + if (ras_history == nullptr) { + makeRASHistory(ras_history); + } + RASHistory *history = static_cast(ras_history); + stats.pushes++; + history->pushed = true; + + addrStacks[tid].push(pc); + + DPRINTF(RAS, "%s: RAS[%i] <= %#x. Entries used: %i, tid:%i\n", __func__, + addrStacks[tid].tos, pc.instAddr(), + addrStacks[tid].usedEntries,tid); + // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10)); +} + + +const PCStateBase* +ReturnAddrStack::pop(ThreadID tid, void * &ras_history) +{ + // Note: The RAS may be both popped and pushed to + // support coroutines. + if (ras_history == nullptr) { + makeRASHistory(ras_history); + } + RASHistory *history = static_cast(ras_history); + stats.pops++; + + history->poped = true; + history->tos = addrStacks[tid].tos; + + + set(history->ras_entry, addrStacks[tid].top()); + // Pop the top of stack + addrStacks[tid].pop(); + + DPRINTF(RAS, "%s: RAS[%i] => %#x. Entries used: %i, tid:%i\n", __func__, + addrStacks[tid].tos, (history->ras_entry.get() != nullptr) + ? history->ras_entry->instAddr() : 0, + addrStacks[tid].usedEntries, tid); + // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10)); + + return history->ras_entry.get(); +} + +void +ReturnAddrStack::squash(ThreadID tid, void * &ras_history) +{ + if (ras_history == nullptr) { + // If ras_history is null no stack operation was performed for + // this branch. Nothing to be done. + return; + } + stats.squashes++; + + RASHistory *history = static_cast(ras_history); + + if (history->pushed) { + stats.pops++; + addrStacks[tid].pop(); + + DPRINTF(RAS, "RAS::%s Incorrect push. Pop RAS[%i]. " + "Entries used: %i, tid:%i\n", __func__, + addrStacks[tid].tos, addrStacks[tid].usedEntries, tid); + } + + if (history->poped) { + stats.pushes++; + addrStacks[tid].restore(history->tos, history->ras_entry.get()); + DPRINTF(RAS, "RAS::%s Incorrect pop. Restore to: RAS[%i]:%#x. " + "Entries used: %i, tid:%i\n", __func__, + history->tos, (history->ras_entry.get() != nullptr) + ? history->ras_entry->instAddr() : 0, + addrStacks[tid].usedEntries, tid); + } + // DPRINTF(RAS, "[%s]\n", addrStacks[tid].toString(10)); + delete history; + ras_history = nullptr; +} + +void +ReturnAddrStack::commit(ThreadID tid, bool misp, + const BranchType brType, void * &ras_history) +{ + // Skip branches that are not call or returns + if (!(brType == BranchType::Return || + brType == BranchType::CallDirect || + brType == BranchType::CallIndirect)) { + // If its not a call or return there should be no ras history. + assert(ras_history == nullptr); + return; + } + + DPRINTF(RAS, "RAS::%s Commit Branch inst: %s, tid:%i\n", + __func__, toString(brType),tid); + + + if (ras_history == nullptr) { + /** + * The only case where we could have no history at this point is + * for a conditional call that is not taken. + * + * Conditional calls + * + * Conditional calls have different scenarios: + * 1. the call was predicted as non taken but was actually taken + * 2. the call was predicted taken but was actually not taken. + * 3. the call was taken but the target was incorrect. + * 4. the call was correct. + * + * In case of mispredictions they will be handled during squashing + * of the BPU. It will push and pop the RAS accordingly. + **/ + return; + } + + /* Handle all other commited returns and calls */ + RASHistory *history = static_cast(ras_history); + + if (history->poped) { + stats.used++; + if (misp) { + stats.incorrect++; + } else { + stats.correct++; + } + + DPRINTF(RAS, "RAS::%s Commit Return PC %#x, correct:%i, tid:%i\n", + __func__, !misp, (history->ras_entry.get() != nullptr) + ? history->ras_entry->instAddr() : 0, tid); + } + delete history; + ras_history = nullptr; +} + + + +ReturnAddrStack::ReturnAddrStackStats::ReturnAddrStackStats( + statistics::Group *parent) + : statistics::Group(parent), + ADD_STAT(pushes, statistics::units::Count::get(), + "Number of times a PC was pushed onto the RAS"), + ADD_STAT(pops, statistics::units::Count::get(), + "Number of times a PC was poped from the RAS"), + ADD_STAT(squashes, statistics::units::Count::get(), + "Number of times the stack operation was squashed due to " + "wrong speculation."), + ADD_STAT(used, statistics::units::Count::get(), + "Number of times the RAS is the provider"), + ADD_STAT(correct, statistics::units::Count::get(), + "Number of times the RAS is the provider and the " + "prediction is correct"), + ADD_STAT(incorrect, statistics::units::Count::get(), + "Number of times the RAS is the provider and the " + "prediction is wrong") +{ +} + } // namespace branch_prediction } // namespace gem5 diff --git a/src/cpu/pred/ras.hh b/src/cpu/pred/ras.hh index 0b4b471c03..294055965e 100644 --- a/src/cpu/pred/ras.hh +++ b/src/cpu/pred/ras.hh @@ -1,4 +1,16 @@ /* + * Copyright (c) 2022-2023 The University of Edinburgh + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2005 The Regents of The University of Michigan * All rights reserved. * @@ -32,7 +44,12 @@ #include #include "arch/generic/pcstate.hh" +#include "base/statistics.hh" #include "base/types.hh" +#include "cpu/pred/branch_type.hh" +#include "cpu/static_inst.hh" +#include "params/ReturnAddrStack.hh" +#include "sim/sim_object.hh" namespace gem5 { @@ -41,70 +58,164 @@ namespace branch_prediction { /** Return address stack class, implements a simple RAS. */ -class ReturnAddrStack +class ReturnAddrStack : public SimObject { public: - /** Creates a return address stack, but init() must be called prior to - * use. - */ - ReturnAddrStack() {} - /** Initializes RAS with a specified number of entries. - * @param numEntries Number of entries in the RAS. + /** Subclass that implements the actual address stack. ****** */ - void init(unsigned numEntries); + class AddrStack + { + public: + AddrStack(ReturnAddrStack &_parent) + : parent(_parent) + {} + + + /** Initializes RAS with a specified number of entries. + * @param numEntries Number of entries in the RAS. + */ + void init(unsigned numEntries); + + void reset(); + + /** Returns the top address on the RAS. */ + const PCStateBase *top(); + + /** Returns the index of the top of the RAS. */ + unsigned topIdx() { return tos; } + + /** Pushes an address onto the RAS. */ + void push(const PCStateBase &return_addr); + + /** Pops the top address from the RAS. */ + void pop(); + + /** Changes index to the top of the RAS, and replaces the top address + * with a new target. + * @param top_of_stack the index saved at the time of the prediction. + * @param restored The new target address of the new top of the RAS. + */ + void restore(unsigned top_of_stack, const PCStateBase *restored); + + bool empty() { return usedEntries == 0; } + + bool full() { return usedEntries >= numEntries; } + + /** Returns the top n entries of the stack as string. For debugging. */ + std::string toString(int n); + + /** Increments the top of stack index. */ + inline void + incrTos() + { + if (++tos == numEntries) + tos = 0; + } + + /** Decrements the top of stack index. */ + inline void + decrTos() + { + tos = (tos == 0 ? numEntries - 1 : tos - 1); + } + + /** The Stack itself. */ + std::vector> addrStack; + + /** The number of entries in the RAS. */ + unsigned numEntries; + + /** The number of used entries in the RAS. */ + unsigned usedEntries; + + /** The top of stack index. */ + unsigned tos; + + protected: + ReturnAddrStack &parent; + }; + + + + public: + // typedef RASParams Params; + typedef ReturnAddrStackParams Params; + + // ReturnAddrStack(BPredUnit &_parent, const RASParams); + ReturnAddrStack(const Params &p); void reset(); - /** Returns the top address on the RAS. */ - const PCStateBase *top() { return addrStack[tos].get(); } - - /** Returns the index of the top of the RAS. */ - unsigned topIdx() { return tos; } - - /** Pushes an address onto the RAS. */ - void push(const PCStateBase &return_addr); - - /** Pops the top address from the RAS. */ - void pop(); - - /** Changes index to the top of the RAS, and replaces the top address with - * a new target. - * @param top_entry_idx The index of the RAS that will now be the top. - * @param restored The new target address of the new top of the RAS. + /** + * Pushes an address onto the RAS. + * @param PC The current PC (should be a call). + * @param ras_history Pointer that will be set to an object that + * has the return address state associated when the address was pushed. */ - void restore(unsigned top_entry_idx, const PCStateBase *restored); + void push(ThreadID tid, const PCStateBase &pc, void * &ras_history); - bool empty() { return usedEntries == 0; } + /** + * Pops the top address from the RAS. + * @param ras_history Pointer that will be set to an object that + * has the return address state associated when an address was poped. + * @return The address that got poped from the stack. + * */ + const PCStateBase* pop(ThreadID tid, void * &ras_history); + + /** + * The branch (call/return) got squashed. + * Restores the state of the RAS and delete history + * @param res_history The pointer to the history object. + */ + void squash(ThreadID tid, void * &ras_history); + + /** + * A branch got finally got finally commited. + * @param misp Whether the branch was mispredicted. + * @param brType The type of the branch. + * @param ras_history The pointer to the history object. + */ + void commit(ThreadID tid, bool misp, + const BranchType brType, void * &ras_history); - bool full() { return usedEntries == numEntries; } private: - /** Increments the top of stack index. */ - inline void - incrTos() - { - if (++tos == numEntries) - tos = 0; - } - /** Decrements the top of stack index. */ - inline void - decrTos() + class RASHistory { - tos = (tos == 0 ? numEntries - 1 : tos - 1); - } + public: + /* Was the RAS pushed or poped for this branch. */ + bool pushed = false; + bool poped = false; + /* Was it a call */ + bool wasReturn = false; + bool wasCall = false; + /** The entry that poped from the RAS (only valid if a return). */ + std::unique_ptr ras_entry; + /** The RAS index (top of stack pointer) of the instruction */ + unsigned tos = 0; + }; + + void makeRASHistory(void* &ras_history); /** The RAS itself. */ - std::vector> addrStack; + std::vector addrStacks; /** The number of entries in the RAS. */ unsigned numEntries; + /** The number of threads */ + unsigned numThreads; - /** The number of used entries in the RAS. */ - unsigned usedEntries; - - /** The top of stack index. */ - unsigned tos; + struct ReturnAddrStackStats : public statistics::Group + { + ReturnAddrStackStats(statistics::Group *parent); + statistics::Scalar pushes; + statistics::Scalar pops; + statistics::Scalar squashes; + statistics::Scalar used; + statistics::Scalar correct; + statistics::Scalar incorrect; + } stats; }; } // namespace branch_prediction diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py index ce265449c9..22ec29e59d 100644 --- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py +++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py @@ -96,7 +96,7 @@ class U74FUPool(MinorFUPool): class U74BP(TournamentBP): btb = SimpleBTB(numEntries=32) - RASSize = 12 + ras = ReturnAddrStack(numEntries=12) localHistoryTableSize = 4096 # is 3.6 KiB but gem5 requires power of 2 localPredictorSize = 16384 globalPredictorSize = 16384