cpu: Fixed the indirect branch predictor GHR handling
The internal indirect predictor global history was not being updated properly, resulting in higher than expected miss rates Also added a parameter to set the size of the indirect predictor GHR Change-Id: Ibc797816974cba6719da65122801e8919559a003 Signed-off-by: Pau Cabre <pau.cabre@metempsy.com> Reported-by: Daniel Carvalho <odanrc@yahoo.com.br> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/16928 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Reviewed-by: Andrea Mondelli <Andrea.Mondelli@ucf.edu> Reviewed-by: Sudhanshu Jha <sudhanshu.jha@arm.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
This commit is contained in:
@@ -51,6 +51,7 @@ class BranchPredictor(SimObject):
|
||||
indirectTagSize = Param.Unsigned(16, "Indirect target cache tag bits")
|
||||
indirectPathLength = Param.Unsigned(3,
|
||||
"Previous indirect targets to use for path history")
|
||||
indirectGHRBits = Param.Unsigned(13, "Indirect GHR number of bits")
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -70,7 +70,8 @@ BPredUnit::BPredUnit(const Params *params)
|
||||
params->indirectTagSize,
|
||||
params->indirectPathLength,
|
||||
params->instShiftAmt,
|
||||
params->numThreads),
|
||||
params->numThreads,
|
||||
params->indirectGHRBits),
|
||||
instShiftAmt(params->instShiftAmt)
|
||||
{
|
||||
for (auto& r : RAS)
|
||||
@@ -207,8 +208,9 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
|
||||
" predicted %i for PC %s\n", tid, seqNum, pred_taken, pc);
|
||||
}
|
||||
|
||||
const bool orig_pred_taken = pred_taken;
|
||||
if (useIndirect) {
|
||||
iPred.updateDirectionInfo(tid, pred_taken, indirect_history);
|
||||
iPred.genIndirectInfo(tid, indirect_history);
|
||||
}
|
||||
|
||||
DPRINTF(Branch, "[tid:%i]: [sn:%i] Creating prediction history "
|
||||
@@ -317,6 +319,15 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
|
||||
|
||||
pc = target;
|
||||
|
||||
if (useIndirect) {
|
||||
// Update the indirect predictor with the direction prediction
|
||||
// Note that this happens after indirect lookup, so it does not use
|
||||
// the new information
|
||||
// Note also that we use orig_pred_taken instead of pred_taken in
|
||||
// as this is the actual outcome of the direction prediction
|
||||
iPred.updateDirectionInfo(tid, orig_pred_taken);
|
||||
}
|
||||
|
||||
predHist[tid].push_front(predict_record);
|
||||
|
||||
DPRINTF(Branch, "[tid:%i]: [sn:%i]: History entry added."
|
||||
@@ -340,7 +351,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid)
|
||||
predHist[tid].back().inst,
|
||||
predHist[tid].back().target);
|
||||
|
||||
iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory);
|
||||
if (useIndirect) {
|
||||
iPred.commit(done_sn, tid, predHist[tid].back().indirectHistory);
|
||||
}
|
||||
|
||||
predHist[tid].pop_back();
|
||||
}
|
||||
@@ -351,7 +364,10 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
|
||||
{
|
||||
History &pred_hist = predHist[tid];
|
||||
|
||||
iPred.squash(squashed_sn, tid);
|
||||
if (useIndirect) {
|
||||
iPred.squash(squashed_sn, tid);
|
||||
}
|
||||
|
||||
while (!pred_hist.empty() &&
|
||||
pred_hist.front().seqNum > squashed_sn) {
|
||||
if (pred_hist.front().usedRAS) {
|
||||
@@ -372,7 +388,7 @@ BPredUnit::squash(const InstSeqNum &squashed_sn, ThreadID tid)
|
||||
// This call should delete the bpHistory.
|
||||
squash(tid, pred_hist.front().bpHistory);
|
||||
if (useIndirect) {
|
||||
iPred.deleteDirectionInfo(tid, pred_hist.front().indirectHistory);
|
||||
iPred.deleteIndirectInfo(tid, pred_hist.front().indirectHistory);
|
||||
}
|
||||
|
||||
DPRINTF(Branch, "[tid:%i]: Removing history for [sn:%i] "
|
||||
@@ -469,7 +485,9 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
|
||||
}
|
||||
if (hist_it->wasIndirect) {
|
||||
++indirectMispredicted;
|
||||
iPred.recordTarget(hist_it->seqNum, corrTarget, tid);
|
||||
iPred.recordTarget(
|
||||
hist_it->seqNum, pred_hist.front().indirectHistory,
|
||||
corrTarget, tid);
|
||||
} else {
|
||||
DPRINTF(Branch,"[tid: %i] BTB Update called for [sn:%i]"
|
||||
" PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
|
||||
|
||||
@@ -36,10 +36,11 @@
|
||||
IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
|
||||
unsigned num_sets, unsigned num_ways,
|
||||
unsigned tag_bits, unsigned path_len, unsigned inst_shift,
|
||||
unsigned num_threads)
|
||||
unsigned num_threads, unsigned ghr_size)
|
||||
: hashGHR(hash_ghr), hashTargets(hash_targets),
|
||||
numSets(num_sets), numWays(num_ways), tagBits(tag_bits),
|
||||
pathLength(path_len), instShift(inst_shift)
|
||||
pathLength(path_len), instShift(inst_shift),
|
||||
ghrNumBits(ghr_size), ghrMask((1 << ghr_size)-1)
|
||||
{
|
||||
if (!isPowerOf2(numSets)) {
|
||||
panic("Indirect predictor requires power of 2 number of sets");
|
||||
@@ -51,18 +52,26 @@ IndirectPredictor::IndirectPredictor(bool hash_ghr, bool hash_targets,
|
||||
for (unsigned i = 0; i < numSets; i++) {
|
||||
targetCache[i].resize(numWays);
|
||||
}
|
||||
|
||||
fatal_if(ghrNumBits > (sizeof(ThreadInfo::ghr)*8), "ghr_size is too big");
|
||||
}
|
||||
|
||||
void
|
||||
IndirectPredictor::updateDirectionInfo(ThreadID tid, bool taken,
|
||||
void* & indirect_history)
|
||||
IndirectPredictor::genIndirectInfo(ThreadID tid, void* & indirect_history)
|
||||
{
|
||||
// record the GHR as it was before this prediction
|
||||
// It will be used to recover the history in case this prediction is
|
||||
// wrong or belongs to bad path
|
||||
indirect_history = new unsigned(threadInfo[tid].ghr);
|
||||
}
|
||||
|
||||
threadInfo[tid].ghr <<= taken;
|
||||
void
|
||||
IndirectPredictor::updateDirectionInfo(
|
||||
ThreadID tid, bool actually_taken)
|
||||
{
|
||||
threadInfo[tid].ghr <<= 1;
|
||||
threadInfo[tid].ghr |= actually_taken;
|
||||
threadInfo[tid].ghr &= ghrMask;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -71,6 +80,7 @@ IndirectPredictor::changeDirectionPrediction(ThreadID tid,
|
||||
{
|
||||
unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
|
||||
threadInfo[tid].ghr = ((*previousGhr) << 1) + actually_taken;
|
||||
threadInfo[tid].ghr &= ghrMask;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -147,7 +157,7 @@ IndirectPredictor::squash(InstSeqNum seq_num, ThreadID tid)
|
||||
}
|
||||
|
||||
void
|
||||
IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history)
|
||||
IndirectPredictor::deleteIndirectInfo(ThreadID tid, void * indirect_history)
|
||||
{
|
||||
unsigned * previousGhr = static_cast<unsigned *>(indirect_history);
|
||||
threadInfo[tid].ghr = *previousGhr;
|
||||
@@ -156,16 +166,19 @@ IndirectPredictor::deleteDirectionInfo(ThreadID tid, void * indirect_history)
|
||||
}
|
||||
|
||||
void
|
||||
IndirectPredictor::recordTarget(InstSeqNum seq_num,
|
||||
const TheISA::PCState& target, ThreadID tid)
|
||||
IndirectPredictor::recordTarget(
|
||||
InstSeqNum seq_num, void * indirect_history, const TheISA::PCState& target,
|
||||
ThreadID tid)
|
||||
{
|
||||
ThreadInfo &t_info = threadInfo[tid];
|
||||
|
||||
unsigned * ghr = static_cast<unsigned *>(indirect_history);
|
||||
|
||||
// Should have just squashed so this branch should be the oldest
|
||||
auto hist_entry = *(t_info.pathHist.rbegin());
|
||||
// Temporarily pop it off the history so we can calculate the set
|
||||
t_info.pathHist.pop_back();
|
||||
Addr set_index = getSetIndex(hist_entry.pcAddr, t_info.ghr, tid);
|
||||
Addr set_index = getSetIndex(hist_entry.pcAddr, *ghr, tid);
|
||||
Addr tag = getTag(hist_entry.pcAddr);
|
||||
hist_entry.targetAddr = target.instAddr();
|
||||
t_info.pathHist.push_back(hist_entry);
|
||||
|
||||
@@ -43,19 +43,20 @@ class IndirectPredictor
|
||||
IndirectPredictor(bool hash_ghr, bool hash_targets,
|
||||
unsigned num_sets, unsigned num_ways,
|
||||
unsigned tag_bits, unsigned path_len,
|
||||
unsigned inst_shift, unsigned num_threads);
|
||||
unsigned inst_shift, unsigned num_threads,
|
||||
unsigned ghr_size);
|
||||
bool lookup(Addr br_addr, TheISA::PCState& br_target, ThreadID tid);
|
||||
void recordIndirect(Addr br_addr, Addr tgt_addr, InstSeqNum seq_num,
|
||||
ThreadID tid);
|
||||
void commit(InstSeqNum seq_num, ThreadID tid, void * indirect_history);
|
||||
void squash(InstSeqNum seq_num, ThreadID tid);
|
||||
void recordTarget(InstSeqNum seq_num, const TheISA::PCState& target,
|
||||
ThreadID tid);
|
||||
void updateDirectionInfo(ThreadID tid, bool taken,
|
||||
void* & indirect_history);
|
||||
void recordTarget(InstSeqNum seq_num, void * indirect_history,
|
||||
const TheISA::PCState& target, ThreadID tid);
|
||||
void genIndirectInfo(ThreadID tid, void* & indirect_history);
|
||||
void updateDirectionInfo(ThreadID tid, bool actually_taken);
|
||||
void deleteIndirectInfo(ThreadID tid, void * indirect_history);
|
||||
void changeDirectionPrediction(ThreadID tid, void * indirect_history,
|
||||
bool actually_taken);
|
||||
void deleteDirectionInfo(ThreadID tid, void * indirect_history);
|
||||
|
||||
private:
|
||||
const bool hashGHR;
|
||||
@@ -65,6 +66,8 @@ class IndirectPredictor
|
||||
const unsigned tagBits;
|
||||
const unsigned pathLength;
|
||||
const unsigned instShift;
|
||||
const unsigned ghrNumBits;
|
||||
const unsigned ghrMask;
|
||||
|
||||
struct IPredEntry
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user