cpu: Made TAGE a SimObject that can be used by other predictors
The TAGE implementation is now a SimObject so that other branch predictors can easily use it. It has also been updated with the latest available TAGE implementation from Andre Seznec: http://www.irisa.fr/alf/downloads/seznec/TAGE-GSC-IMLI.tar Change-Id: I2251b8b2d7f94124f9955f52b917dc3b064f090e Reviewed-on: https://gem5-review.googlesource.com/c/15317 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
This commit is contained in:
@@ -119,7 +119,7 @@ LocalBP::lookup(ThreadID tid, Addr branch_addr, void * &bp_history)
|
||||
|
||||
void
|
||||
LocalBP::update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed)
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget)
|
||||
{
|
||||
assert(bp_history == NULL);
|
||||
unsigned local_predictor_idx;
|
||||
|
||||
@@ -92,7 +92,7 @@ class LocalBP : public BPredUnit
|
||||
* @param taken Whether or not the branch was taken.
|
||||
*/
|
||||
void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed);
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget);
|
||||
|
||||
void squash(ThreadID tid, void *bp_history)
|
||||
{ assert(bp_history == NULL); }
|
||||
|
||||
@@ -87,12 +87,14 @@ class BiModeBP(BranchPredictor):
|
||||
choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
|
||||
choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")
|
||||
|
||||
# TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf
|
||||
# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits)
|
||||
class TAGE(BranchPredictor):
|
||||
type = 'TAGE'
|
||||
cxx_class = 'TAGE'
|
||||
cxx_header = "cpu/pred/tage.hh"
|
||||
class TAGEBase(SimObject):
|
||||
type = 'TAGEBase'
|
||||
cxx_class = 'TAGEBase'
|
||||
cxx_header = "cpu/pred/tage_base.hh"
|
||||
|
||||
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
|
||||
instShiftAmt = Param.Unsigned(Parent.instShiftAmt,
|
||||
"Number of bits to shift instructions by")
|
||||
|
||||
nHistoryTables = Param.Unsigned(7, "Number of history tables")
|
||||
minHist = Param.Unsigned(5, "Minimum history size of TAGE")
|
||||
@@ -115,8 +117,33 @@ class TAGE(BranchPredictor):
|
||||
pathHistBits = Param.Unsigned(16, "Path history size")
|
||||
logUResetPeriod = Param.Unsigned(18,
|
||||
"Log period in number of branches to reset TAGE useful counters")
|
||||
numUseAltOnNa = Param.Unsigned(1, "Number of USE_ALT_ON_NA counters")
|
||||
useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
|
||||
|
||||
maxNumAlloc = Param.Unsigned(1,
|
||||
"Max number of TAGE entries allocted on mispredict")
|
||||
|
||||
# List of enabled TAGE tables. If empty, all are enabled
|
||||
noSkip = VectorParam.Bool([], "Vector of enabled TAGE tables")
|
||||
|
||||
speculativeHistUpdate = Param.Bool(True,
|
||||
"Use speculative update for histories")
|
||||
|
||||
# TAGE branch predictor as described in https://www.jilp.org/vol8/v8paper1.pdf
|
||||
# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits)
|
||||
class TAGE(BranchPredictor):
|
||||
type = 'TAGE'
|
||||
cxx_class = 'TAGE'
|
||||
cxx_header = "cpu/pred/tage.hh"
|
||||
tage = Param.TAGEBase(TAGEBase(), "Tage object")
|
||||
|
||||
class LTAGE_TAGE(TAGEBase):
|
||||
nHistoryTables = 12
|
||||
minHist = 4
|
||||
maxHist = 640
|
||||
tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15]
|
||||
logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9]
|
||||
logUResetPeriod = 19
|
||||
|
||||
# LTAGE branch predictor as described in
|
||||
# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf
|
||||
@@ -127,12 +154,7 @@ class LTAGE(TAGE):
|
||||
cxx_class = 'LTAGE'
|
||||
cxx_header = "cpu/pred/ltage.hh"
|
||||
|
||||
nHistoryTables = 12
|
||||
minHist = 4
|
||||
maxHist = 640
|
||||
tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15]
|
||||
logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9]
|
||||
logUResetPeriod = 19
|
||||
tage = LTAGE_TAGE()
|
||||
|
||||
logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
|
||||
withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
|
||||
|
||||
@@ -43,6 +43,7 @@ Source('indirect.cc')
|
||||
Source('ras.cc')
|
||||
Source('tournament.cc')
|
||||
Source ('bi_mode.cc')
|
||||
Source('tage_base.cc')
|
||||
Source('tage.cc')
|
||||
Source('ltage.cc')
|
||||
DebugFlag('FreeList')
|
||||
|
||||
@@ -161,7 +161,7 @@ BiModeBP::btbUpdate(ThreadID tid, Addr branchAddr, void * &bpHistory)
|
||||
*/
|
||||
void
|
||||
BiModeBP::update(ThreadID tid, Addr branchAddr, bool taken, void *bpHistory,
|
||||
bool squashed)
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget)
|
||||
{
|
||||
assert(bpHistory);
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ class BiModeBP : public BPredUnit
|
||||
bool lookup(ThreadID tid, Addr branch_addr, void * &bp_history);
|
||||
void btbUpdate(ThreadID tid, Addr branch_addr, void * &bp_history);
|
||||
void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed);
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget);
|
||||
unsigned getGHR(ThreadID tid, void *bp_history) const;
|
||||
|
||||
private:
|
||||
|
||||
@@ -210,7 +210,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
|
||||
"for PC %s\n", tid, seqNum, pc);
|
||||
|
||||
PredictorHistory predict_record(seqNum, pc.instAddr(),
|
||||
pred_taken, bp_history, tid);
|
||||
pred_taken, bp_history, tid, inst);
|
||||
|
||||
// Now lookup in the BTB or RAS.
|
||||
if (pred_taken) {
|
||||
@@ -309,6 +309,7 @@ BPredUnit::predict(const StaticInstPtr &inst, const InstSeqNum &seqNum,
|
||||
}
|
||||
TheISA::advancePC(target, inst);
|
||||
}
|
||||
predict_record.target = target.instAddr();
|
||||
|
||||
pc = target;
|
||||
|
||||
@@ -332,7 +333,9 @@ BPredUnit::update(const InstSeqNum &done_sn, ThreadID tid)
|
||||
// Update the branch predictor with the correct results.
|
||||
update(tid, predHist[tid].back().pc,
|
||||
predHist[tid].back().predTaken,
|
||||
predHist[tid].back().bpHistory, false);
|
||||
predHist[tid].back().bpHistory, false,
|
||||
predHist[tid].back().inst,
|
||||
predHist[tid].back().target);
|
||||
|
||||
predHist[tid].pop_back();
|
||||
}
|
||||
@@ -440,9 +443,11 @@ BPredUnit::squash(const InstSeqNum &squashed_sn,
|
||||
|
||||
// Remember the correct direction for the update at commit.
|
||||
pred_hist.front().predTaken = actually_taken;
|
||||
pred_hist.front().target = corrTarget.instAddr();
|
||||
|
||||
update(tid, (*hist_it).pc, actually_taken,
|
||||
pred_hist.front().bpHistory, true);
|
||||
pred_hist.front().bpHistory, true, pred_hist.front().inst,
|
||||
corrTarget.instAddr());
|
||||
|
||||
if (actually_taken) {
|
||||
if (hist_it->wasReturn && !hist_it->usedRAS) {
|
||||
|
||||
@@ -175,10 +175,15 @@ class BPredUnit : public SimObject
|
||||
* associated with the branch lookup that is being updated.
|
||||
* @param squashed Set to true when this function is called during a
|
||||
* squash operation.
|
||||
* @param inst Static instruction information
|
||||
* @param corrTarget The resolved target of the branch (only needed
|
||||
* for squashed branches)
|
||||
* @todo Make this update flexible enough to handle a global predictor.
|
||||
*/
|
||||
virtual void update(ThreadID tid, Addr instPC, bool taken,
|
||||
void *bp_history, bool squashed) = 0;
|
||||
void *bp_history, bool squashed,
|
||||
const StaticInstPtr & inst = StaticInst::nullStaticInstPtr,
|
||||
Addr corrTarget = MaxAddr) = 0;
|
||||
/**
|
||||
* Updates the BTB with the target of a branch.
|
||||
* @param inst_PC The branch's PC that will be updated.
|
||||
@@ -200,10 +205,11 @@ class BPredUnit : public SimObject
|
||||
*/
|
||||
PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
|
||||
bool pred_taken, void *bp_history,
|
||||
ThreadID _tid)
|
||||
ThreadID _tid, const StaticInstPtr & inst)
|
||||
: seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
|
||||
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0), pushedRAS(0),
|
||||
wasCall(0), wasReturn(0), wasIndirect(0)
|
||||
wasCall(0), wasReturn(0), wasIndirect(0),
|
||||
target(MaxAddr), inst(inst)
|
||||
{}
|
||||
|
||||
bool operator==(const PredictorHistory &entry) const {
|
||||
@@ -248,6 +254,14 @@ class BPredUnit : public SimObject
|
||||
|
||||
/** Wether this instruction was an indirect branch */
|
||||
bool wasIndirect;
|
||||
|
||||
/** Target of the branch. First it is predicted, and fixed later
|
||||
* if necessary
|
||||
*/
|
||||
Addr target;
|
||||
|
||||
/** The branch instrction */
|
||||
const StaticInstPtr inst;
|
||||
};
|
||||
|
||||
typedef std::deque<PredictorHistory> History;
|
||||
|
||||
@@ -168,9 +168,10 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
|
||||
ltable[idx].confidence = 0;
|
||||
ltable[idx].currentIter = 0;
|
||||
return;
|
||||
} else if (bi->loopPred != bi->tagePred) {
|
||||
} else if (bi->loopPred != bi->tageBranchInfo->tagePred) {
|
||||
DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
|
||||
unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
|
||||
TAGEBase::unsignedCtrUpdate(ltable[idx].age, true,
|
||||
loopTableAgeBits);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,7 +191,7 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
|
||||
if (ltable[idx].currentIter == ltable[idx].numIter) {
|
||||
DPRINTF(LTage, "Loop End predicted successfully:%lx\n", pc);
|
||||
|
||||
unsignedCtrUpdate(ltable[idx].confidence, true,
|
||||
TAGEBase::unsignedCtrUpdate(ltable[idx].confidence, true,
|
||||
loopTableConfidenceBits);
|
||||
//just do not predict when the loop count is 1 or 2
|
||||
if (ltable[idx].numIter < 3) {
|
||||
@@ -218,10 +219,11 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
|
||||
}
|
||||
|
||||
} else if (useDirectionBit ?
|
||||
((bi->loopPredValid ? bi->loopPred : bi->tagePred) != taken) :
|
||||
((bi->loopPredValid ?
|
||||
bi->loopPred : bi->tageBranchInfo->tagePred) != taken) :
|
||||
taken) {
|
||||
//try to allocate an entry on taken branch
|
||||
int nrand = random_mt.random<int>();
|
||||
int nrand = TAGEBase::getRandom();
|
||||
for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
|
||||
int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
|
||||
idx = bi->loopIndex + loop_hit;
|
||||
@@ -248,10 +250,11 @@ LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
|
||||
bool
|
||||
LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
|
||||
{
|
||||
LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
|
||||
LTageBranchInfo *bi = new LTageBranchInfo(*tage);
|
||||
b = (void*)(bi);
|
||||
|
||||
bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);
|
||||
bool pred_taken = tage->tagePredict(tid, branch_pc, cond_branch,
|
||||
bi->tageBranchInfo);
|
||||
|
||||
if (cond_branch) {
|
||||
// loop prediction
|
||||
@@ -259,12 +262,13 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
|
||||
|
||||
if ((loopUseCounter >= 0) && bi->loopPredValid) {
|
||||
pred_taken = bi->loopPred;
|
||||
bi->provider = LOOP;
|
||||
bi->tageBranchInfo->provider = LOOP;
|
||||
}
|
||||
DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
|
||||
"loopValid?:%d, loopUseCounter:%d, tagePred:%d, altPred:%d\n",
|
||||
branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
|
||||
loopUseCounter, bi->tagePred, bi->altTaken);
|
||||
loopUseCounter, bi->tageBranchInfo->tagePred,
|
||||
bi->tageBranchInfo->altTaken);
|
||||
|
||||
if (useSpeculation) {
|
||||
specLoopUpdate(pred_taken, bi);
|
||||
@@ -275,40 +279,68 @@ LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
|
||||
}
|
||||
|
||||
void
|
||||
LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
|
||||
TageBranchInfo* tage_bi, int nrand)
|
||||
LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget)
|
||||
{
|
||||
LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);
|
||||
assert(bp_history);
|
||||
|
||||
if (useSpeculation) {
|
||||
// recalculate loop prediction without speculation
|
||||
// It is ok to overwrite the loop prediction fields in bi
|
||||
// as the stats have already been updated with the previous
|
||||
// values
|
||||
bi->loopPred = getLoop(branch_pc, bi, false);
|
||||
}
|
||||
LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(bp_history);
|
||||
|
||||
if (bi->loopPredValid) {
|
||||
if (bi->tagePred != bi->loopPred) {
|
||||
ctrUpdate(loopUseCounter,
|
||||
(bi->loopPred == taken),
|
||||
withLoopBits);
|
||||
if (squashed) {
|
||||
if (tage->isSpeculativeUpdateEnabled()) {
|
||||
// This restores the global history, then update it
|
||||
// and recomputes the folded histories.
|
||||
tage->squash(tid, taken, bi->tageBranchInfo, corrTarget);
|
||||
squashLoop(bi);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
loopUpdate(branch_pc, taken, bi);
|
||||
int nrand = TAGEBase::getRandom() & 3;
|
||||
if (bi->tageBranchInfo->condBranch) {
|
||||
DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
|
||||
branch_pc, taken);
|
||||
tage->updateStats(taken, bi->tageBranchInfo);
|
||||
// update stats
|
||||
if (bi->tageBranchInfo->provider == LOOP) {
|
||||
if (taken == bi->loopPred) {
|
||||
loopPredictorCorrect++;
|
||||
} else {
|
||||
loopPredictorWrong++;
|
||||
}
|
||||
}
|
||||
// cond Branch Update
|
||||
if (useSpeculation) {
|
||||
// recalculate loop prediction without speculation
|
||||
// It is ok to overwrite the loop prediction fields in bi
|
||||
// as the stats have already been updated with the previous
|
||||
// values
|
||||
bi->loopPred = getLoop(branch_pc, bi, false);
|
||||
}
|
||||
if (bi->loopPredValid) {
|
||||
if (bi->tageBranchInfo->tagePred != bi->loopPred) {
|
||||
TAGEBase::ctrUpdate(loopUseCounter,
|
||||
(bi->loopPred == taken),
|
||||
withLoopBits);
|
||||
}
|
||||
}
|
||||
|
||||
TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
|
||||
loopUpdate(branch_pc, taken, bi);
|
||||
|
||||
tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo,
|
||||
nrand, corrTarget);
|
||||
}
|
||||
|
||||
tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false,
|
||||
inst, corrTarget);
|
||||
|
||||
delete bi;
|
||||
}
|
||||
|
||||
void
|
||||
LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
|
||||
LTAGE::squashLoop(LTageBranchInfo* bi)
|
||||
{
|
||||
TAGE::squash(tid, taken, bp_history);
|
||||
|
||||
LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
|
||||
|
||||
if (bi->condBranch) {
|
||||
if (bi->tageBranchInfo->condBranch) {
|
||||
if (bi->loopHit >= 0) {
|
||||
int idx = finallindex(bi->loopIndex,
|
||||
bi->loopLowPcBits,
|
||||
@@ -322,37 +354,14 @@ void
|
||||
LTAGE::squash(ThreadID tid, void *bp_history)
|
||||
{
|
||||
LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
|
||||
if (bi->condBranch) {
|
||||
if (bi->loopHit >= 0) {
|
||||
int idx = finallindex(bi->loopIndex,
|
||||
bi->loopLowPcBits,
|
||||
bi->loopHit);
|
||||
ltable[idx].currentIterSpec = bi->currentIter;
|
||||
}
|
||||
|
||||
if (bi->tageBranchInfo->condBranch) {
|
||||
squashLoop(bi);
|
||||
}
|
||||
|
||||
TAGE::squash(tid, bp_history);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LTAGE::updateStats(bool taken, TageBranchInfo* bi)
|
||||
{
|
||||
TAGE::updateStats(taken, bi);
|
||||
|
||||
LTageBranchInfo * ltage_bi = static_cast<LTageBranchInfo *>(bi);
|
||||
|
||||
if (ltage_bi->provider == LOOP) {
|
||||
if (taken == ltage_bi->loopPred) {
|
||||
loopPredictorCorrect++;
|
||||
} else {
|
||||
loopPredictorWrong++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
LTAGE::regStats()
|
||||
{
|
||||
|
||||
@@ -66,6 +66,9 @@ class LTAGE: public TAGE
|
||||
|
||||
// Base class methods.
|
||||
void squash(ThreadID tid, void *bp_history) override;
|
||||
void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed, const StaticInstPtr & inst,
|
||||
Addr corrTarget) override;
|
||||
|
||||
void regStats() override;
|
||||
|
||||
@@ -88,7 +91,7 @@ class LTAGE: public TAGE
|
||||
|
||||
// more provider types
|
||||
enum {
|
||||
LOOP = LAST_TAGE_PROVIDER_TYPE + 1
|
||||
LOOP = TAGEBase::LAST_TAGE_PROVIDER_TYPE + 1
|
||||
};
|
||||
|
||||
// Primary branch history entry
|
||||
@@ -103,12 +106,15 @@ class LTAGE: public TAGE
|
||||
int loopLowPcBits; // only for useHashing
|
||||
int loopHit;
|
||||
|
||||
LTageBranchInfo(int sz)
|
||||
: TageBranchInfo(sz),
|
||||
LTageBranchInfo(TAGEBase &tage)
|
||||
: TageBranchInfo(tage),
|
||||
loopTag(0), currentIter(0),
|
||||
loopPred(false),
|
||||
loopPredValid(false), loopIndex(0), loopLowPcBits(0), loopHit(0)
|
||||
{}
|
||||
|
||||
virtual ~LTageBranchInfo()
|
||||
{}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -156,17 +162,6 @@ class LTAGE: public TAGE
|
||||
*/
|
||||
void specLoopUpdate(bool taken, LTageBranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Update LTAGE for conditional branches.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
* @nrand Random int number from 0 to 3
|
||||
*/
|
||||
void condBranchUpdate(
|
||||
Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand) override;
|
||||
|
||||
/**
|
||||
* Get a branch prediction from LTAGE. *NOT* an override of
|
||||
* BpredUnit::predict().
|
||||
@@ -184,24 +179,9 @@ class LTAGE: public TAGE
|
||||
* Restores speculatively updated path and direction histories.
|
||||
* Also recomputes compressed (folded) histories based on the
|
||||
* correct branch outcome.
|
||||
* This version of squash() is called once on a branch misprediction.
|
||||
* @param tid The Thread ID to select the histories to rollback.
|
||||
* @param taken The correct branch outcome.
|
||||
* @param bp_history Wrapping pointer to TageBranchInfo (to allow
|
||||
* storing derived class prediction information in the
|
||||
* base class).
|
||||
* @post bp_history points to valid memory.
|
||||
* @param bi Branch information.
|
||||
*/
|
||||
void squash(
|
||||
ThreadID tid, bool taken, void *bp_history) override;
|
||||
|
||||
/**
|
||||
* Update the stats
|
||||
* @param taken Actual branch outcome
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
void updateStats(bool taken, TageBranchInfo* bi) override;
|
||||
void squashLoop(LTageBranchInfo * bi);
|
||||
|
||||
const unsigned logSizeLoopPred;
|
||||
const unsigned loopTableAgeBits;
|
||||
|
||||
@@ -47,222 +47,8 @@
|
||||
#include "debug/Fetch.hh"
|
||||
#include "debug/Tage.hh"
|
||||
|
||||
TAGE::TAGE(const TAGEParams *params)
|
||||
: BPredUnit(params),
|
||||
logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
|
||||
nHistoryTables(params->nHistoryTables),
|
||||
tagTableCounterBits(params->tagTableCounterBits),
|
||||
tagTableUBits(params->tagTableUBits),
|
||||
histBufferSize(params->histBufferSize),
|
||||
minHist(params->minHist),
|
||||
maxHist(params->maxHist),
|
||||
pathHistBits(params->pathHistBits),
|
||||
tagTableTagWidths(params->tagTableTagWidths),
|
||||
logTagTableSizes(params->logTagTableSizes),
|
||||
threadHistory(params->numThreads),
|
||||
logUResetPeriod(params->logUResetPeriod),
|
||||
useAltOnNaBits(params->useAltOnNaBits)
|
||||
TAGE::TAGE(const TAGEParams *params) : BPredUnit(params), tage(params->tage)
|
||||
{
|
||||
// Current method for periodically resetting the u counter bits only
|
||||
// works for 1 or 2 bits
|
||||
// Also make sure that it is not 0
|
||||
assert(tagTableUBits <= 2 && (tagTableUBits > 0));
|
||||
|
||||
// we use int type for the path history, so it cannot be more than
|
||||
// its size
|
||||
assert(pathHistBits <= (sizeof(int)*8));
|
||||
|
||||
// initialize the counter to half of the period
|
||||
assert(logUResetPeriod != 0);
|
||||
tCounter = ULL(1) << (logUResetPeriod - 1);
|
||||
|
||||
assert(params->histBufferSize > params->maxHist * 2);
|
||||
useAltPredForNewlyAllocated = 0;
|
||||
|
||||
for (auto& history : threadHistory) {
|
||||
history.pathHist = 0;
|
||||
history.globalHistory = new uint8_t[histBufferSize];
|
||||
history.gHist = history.globalHistory;
|
||||
memset(history.gHist, 0, histBufferSize);
|
||||
history.ptGhist = 0;
|
||||
}
|
||||
|
||||
histLengths = new int [nHistoryTables+1];
|
||||
histLengths[1] = minHist;
|
||||
histLengths[nHistoryTables] = maxHist;
|
||||
|
||||
for (int i = 2; i <= nHistoryTables; i++) {
|
||||
histLengths[i] = (int) (((double) minHist *
|
||||
pow ((double) (maxHist) / (double) minHist,
|
||||
(double) (i - 1) / (double) ((nHistoryTables- 1))))
|
||||
+ 0.5);
|
||||
}
|
||||
|
||||
assert(tagTableTagWidths.size() == (nHistoryTables+1));
|
||||
assert(logTagTableSizes.size() == (nHistoryTables+1));
|
||||
|
||||
// First entry is for the Bimodal table and it is untagged in this
|
||||
// implementation
|
||||
assert(tagTableTagWidths[0] == 0);
|
||||
|
||||
for (auto& history : threadHistory) {
|
||||
history.computeIndices = new FoldedHistory[nHistoryTables+1];
|
||||
history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
|
||||
history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
|
||||
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
history.computeIndices[i].init(
|
||||
histLengths[i], (logTagTableSizes[i]));
|
||||
history.computeTags[0][i].init(
|
||||
history.computeIndices[i].origLength, tagTableTagWidths[i]);
|
||||
history.computeTags[1][i].init(
|
||||
history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
|
||||
DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
|
||||
histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
|
||||
btablePrediction.resize(bimodalTableSize, false);
|
||||
btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
|
||||
true);
|
||||
|
||||
gtable = new TageEntry*[nHistoryTables + 1];
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
|
||||
}
|
||||
|
||||
tableIndices = new int [nHistoryTables+1];
|
||||
tableTags = new int [nHistoryTables+1];
|
||||
}
|
||||
|
||||
int
|
||||
TAGE::bindex(Addr pc_in) const
|
||||
{
|
||||
return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
|
||||
}
|
||||
|
||||
int
|
||||
TAGE::F(int A, int size, int bank) const
|
||||
{
|
||||
int A1, A2;
|
||||
|
||||
A = A & ((ULL(1) << size) - 1);
|
||||
A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
|
||||
A2 = (A >> logTagTableSizes[bank]);
|
||||
A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
|
||||
+ (A2 >> (logTagTableSizes[bank] - bank));
|
||||
A = A1 ^ A2;
|
||||
A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
|
||||
+ (A >> (logTagTableSizes[bank] - bank));
|
||||
return (A);
|
||||
}
|
||||
|
||||
|
||||
// gindex computes a full hash of pc, ghist and pathHist
|
||||
int
|
||||
TAGE::gindex(ThreadID tid, Addr pc, int bank) const
|
||||
{
|
||||
int index;
|
||||
int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
|
||||
histLengths[bank];
|
||||
const Addr shiftedPc = pc >> instShiftAmt;
|
||||
index =
|
||||
shiftedPc ^
|
||||
(shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
|
||||
threadHistory[tid].computeIndices[bank].comp ^
|
||||
F(threadHistory[tid].pathHist, hlen, bank);
|
||||
|
||||
return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
|
||||
}
|
||||
|
||||
|
||||
// Tag computation
|
||||
uint16_t
|
||||
TAGE::gtag(ThreadID tid, Addr pc, int bank) const
|
||||
{
|
||||
int tag = (pc >> instShiftAmt) ^
|
||||
threadHistory[tid].computeTags[0][bank].comp ^
|
||||
(threadHistory[tid].computeTags[1][bank].comp << 1);
|
||||
|
||||
return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
|
||||
}
|
||||
|
||||
|
||||
// Up-down saturating counter
|
||||
void
|
||||
TAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
|
||||
{
|
||||
assert(nbits <= sizeof(int8_t) << 3);
|
||||
if (taken) {
|
||||
if (ctr < ((1 << (nbits - 1)) - 1))
|
||||
ctr++;
|
||||
} else {
|
||||
if (ctr > -(1 << (nbits - 1)))
|
||||
ctr--;
|
||||
}
|
||||
}
|
||||
|
||||
// Up-down unsigned saturating counter
|
||||
void
|
||||
TAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
|
||||
{
|
||||
assert(nbits <= sizeof(uint8_t) << 3);
|
||||
if (up) {
|
||||
if (ctr < ((1 << nbits) - 1))
|
||||
ctr++;
|
||||
} else {
|
||||
if (ctr)
|
||||
ctr--;
|
||||
}
|
||||
}
|
||||
|
||||
// Bimodal prediction
|
||||
bool
|
||||
TAGE::getBimodePred(Addr pc, TageBranchInfo* bi) const
|
||||
{
|
||||
return btablePrediction[bi->bimodalIndex];
|
||||
}
|
||||
|
||||
|
||||
// Update the bimodal predictor: a hysteresis bit is shared among N prediction
|
||||
// bits (N = 2 ^ logRatioBiModalHystEntries)
|
||||
void
|
||||
TAGE::baseUpdate(Addr pc, bool taken, TageBranchInfo* bi)
|
||||
{
|
||||
int inter = (btablePrediction[bi->bimodalIndex] << 1)
|
||||
+ btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
|
||||
if (taken) {
|
||||
if (inter < 3)
|
||||
inter++;
|
||||
} else if (inter > 0) {
|
||||
inter--;
|
||||
}
|
||||
const bool pred = inter >> 1;
|
||||
const bool hyst = inter & 1;
|
||||
btablePrediction[bi->bimodalIndex] = pred;
|
||||
btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
|
||||
DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
|
||||
}
|
||||
|
||||
// shifting the global history: we manage the history in a big table in order
|
||||
// to reduce simulation time
|
||||
void
|
||||
TAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
|
||||
{
|
||||
if (pt == 0) {
|
||||
DPRINTF(Tage, "Rolling over the histories\n");
|
||||
// Copy beginning of globalHistoryBuffer to end, such that
|
||||
// the last maxHist outcomes are still reachable
|
||||
// through pt[0 .. maxHist - 1].
|
||||
for (int i = 0; i < maxHist; i++)
|
||||
tab[histBufferSize - maxHist + i] = tab[i];
|
||||
pt = histBufferSize - maxHist;
|
||||
h = &tab[pt];
|
||||
}
|
||||
pt--;
|
||||
h--;
|
||||
h[0] = (dir) ? 1 : 0;
|
||||
}
|
||||
|
||||
// Get GHR for hashing indirect predictor
|
||||
@@ -272,308 +58,68 @@ unsigned
|
||||
TAGE::getGHR(ThreadID tid, void *bp_history) const
|
||||
{
|
||||
TageBranchInfo* bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
unsigned val = 0;
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
// Make sure we don't go out of bounds
|
||||
int gh_offset = bi->ptGhist + i;
|
||||
assert(&(threadHistory[tid].globalHistory[gh_offset]) <
|
||||
threadHistory[tid].globalHistory + histBufferSize);
|
||||
val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
//prediction
|
||||
bool
|
||||
TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
|
||||
{
|
||||
TageBranchInfo *bi = new TageBranchInfo(nHistoryTables+1);
|
||||
b = (void*)(bi);
|
||||
return tagePredict(tid, branch_pc, cond_branch, bi);
|
||||
}
|
||||
|
||||
bool
|
||||
TAGE::tagePredict(ThreadID tid, Addr branch_pc,
|
||||
bool cond_branch, TageBranchInfo* bi)
|
||||
{
|
||||
Addr pc = branch_pc;
|
||||
bool pred_taken = true;
|
||||
|
||||
if (cond_branch) {
|
||||
// TAGE prediction
|
||||
|
||||
// computes the table addresses and the partial tags
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tableIndices[i] = gindex(tid, pc, i);
|
||||
bi->tableIndices[i] = tableIndices[i];
|
||||
tableTags[i] = gtag(tid, pc, i);
|
||||
bi->tableTags[i] = tableTags[i];
|
||||
}
|
||||
|
||||
bi->bimodalIndex = bindex(pc);
|
||||
|
||||
bi->hitBank = 0;
|
||||
bi->altBank = 0;
|
||||
//Look for the bank with longest matching history
|
||||
for (int i = nHistoryTables; i > 0; i--) {
|
||||
if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
|
||||
bi->hitBank = i;
|
||||
bi->hitBankIndex = tableIndices[bi->hitBank];
|
||||
break;
|
||||
}
|
||||
}
|
||||
//Look for the alternate bank
|
||||
for (int i = bi->hitBank - 1; i > 0; i--) {
|
||||
if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
|
||||
bi->altBank = i;
|
||||
bi->altBankIndex = tableIndices[bi->altBank];
|
||||
break;
|
||||
}
|
||||
}
|
||||
//computes the prediction and the alternate prediction
|
||||
if (bi->hitBank > 0) {
|
||||
if (bi->altBank > 0) {
|
||||
bi->altTaken =
|
||||
gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
|
||||
}else {
|
||||
bi->altTaken = getBimodePred(pc, bi);
|
||||
}
|
||||
|
||||
bi->longestMatchPred =
|
||||
gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
|
||||
bi->pseudoNewAlloc =
|
||||
abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
|
||||
|
||||
//if the entry is recognized as a newly allocated entry and
|
||||
//useAltPredForNewlyAllocated is positive use the alternate
|
||||
//prediction
|
||||
if ((useAltPredForNewlyAllocated < 0) || ! bi->pseudoNewAlloc) {
|
||||
bi->tagePred = bi->longestMatchPred;
|
||||
bi->provider = TAGE_LONGEST_MATCH;
|
||||
} else {
|
||||
bi->tagePred = bi->altTaken;
|
||||
bi->provider = bi->altBank ? TAGE_ALT_MATCH
|
||||
: BIMODAL_ALT_MATCH;
|
||||
}
|
||||
} else {
|
||||
bi->altTaken = getBimodePred(pc, bi);
|
||||
bi->tagePred = bi->altTaken;
|
||||
bi->longestMatchPred = bi->altTaken;
|
||||
bi->provider = BIMODAL_ONLY;
|
||||
}
|
||||
//end TAGE prediction
|
||||
|
||||
pred_taken = (bi->tagePred);
|
||||
DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n",
|
||||
branch_pc, pred_taken, bi->tagePred, bi->altTaken);
|
||||
}
|
||||
bi->branchPC = branch_pc;
|
||||
bi->condBranch = cond_branch;
|
||||
return pred_taken;
|
||||
return tage->getGHR(tid, bi->tageBranchInfo);
|
||||
}
|
||||
|
||||
// PREDICTOR UPDATE
|
||||
void
|
||||
TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
|
||||
bool squashed)
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget)
|
||||
{
|
||||
assert(bp_history);
|
||||
|
||||
TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
|
||||
assert(corrTarget != MaxAddr);
|
||||
|
||||
if (squashed) {
|
||||
// This restores the global history, then update it
|
||||
// and recomputes the folded histories.
|
||||
squash(tid, taken, bp_history);
|
||||
tage->squash(tid, taken, bi->tageBranchInfo, corrTarget);
|
||||
return;
|
||||
}
|
||||
|
||||
int nrand = random_mt.random<int>(0,3);
|
||||
if (bi->condBranch) {
|
||||
int nrand = TAGEBase::getRandom() & 3;
|
||||
if (bi->tageBranchInfo->condBranch) {
|
||||
DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n",
|
||||
branch_pc, taken);
|
||||
updateStats(taken, bi);
|
||||
condBranchUpdate(branch_pc, taken, bi, nrand);
|
||||
}
|
||||
if (!squashed) {
|
||||
delete bi;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::condBranchUpdate(Addr branch_pc, bool taken,
|
||||
TageBranchInfo* bi, int nrand)
|
||||
{
|
||||
// TAGE UPDATE
|
||||
// try to allocate a new entries only if prediction was wrong
|
||||
bool longest_match_pred = false;
|
||||
bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
|
||||
if (bi->hitBank > 0) {
|
||||
// Manage the selection between longest matching and alternate
|
||||
// matching for "pseudo"-newly allocated longest matching entry
|
||||
longest_match_pred = bi->longestMatchPred;
|
||||
bool PseudoNewAlloc = bi->pseudoNewAlloc;
|
||||
// an entry is considered as newly allocated if its prediction
|
||||
// counter is weak
|
||||
if (PseudoNewAlloc) {
|
||||
if (longest_match_pred == taken) {
|
||||
alloc = false;
|
||||
}
|
||||
// if it was delivering the correct prediction, no need to
|
||||
// allocate new entry even if the overall prediction was false
|
||||
if (longest_match_pred != bi->altTaken) {
|
||||
ctrUpdate(useAltPredForNewlyAllocated,
|
||||
bi->altTaken == taken, useAltOnNaBits);
|
||||
}
|
||||
}
|
||||
tage->updateStats(taken, bi->tageBranchInfo);
|
||||
tage->condBranchUpdate(tid, branch_pc, taken, bi->tageBranchInfo,
|
||||
nrand, corrTarget);
|
||||
}
|
||||
|
||||
if (alloc) {
|
||||
// is there some "unuseful" entry to allocate
|
||||
uint8_t min = 1;
|
||||
for (int i = nHistoryTables; i > bi->hitBank; i--) {
|
||||
if (gtable[i][bi->tableIndices[i]].u < min) {
|
||||
min = gtable[i][bi->tableIndices[i]].u;
|
||||
}
|
||||
}
|
||||
tage->updateHistories(tid, branch_pc, taken, bi->tageBranchInfo, false,
|
||||
inst, corrTarget);
|
||||
|
||||
// we allocate an entry with a longer history
|
||||
// to avoid ping-pong, we do not choose systematically the next
|
||||
// entry, but among the 3 next entries
|
||||
int Y = nrand &
|
||||
((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
|
||||
int X = bi->hitBank + 1;
|
||||
if (Y & 1) {
|
||||
X++;
|
||||
if (Y & 2)
|
||||
X++;
|
||||
}
|
||||
// No entry available, forces one to be available
|
||||
if (min > 0) {
|
||||
gtable[X][bi->tableIndices[X]].u = 0;
|
||||
}
|
||||
|
||||
|
||||
//Allocate only one entry
|
||||
for (int i = X; i <= nHistoryTables; i++) {
|
||||
if ((gtable[i][bi->tableIndices[i]].u == 0)) {
|
||||
gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
|
||||
gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
//periodic reset of u: reset is not complete but bit by bit
|
||||
tCounter++;
|
||||
if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
|
||||
// reset least significant bit
|
||||
// most significant bit becomes least significant bit
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
|
||||
gtable[i][j].u = gtable[i][j].u >> 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bi->hitBank > 0) {
|
||||
DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n",
|
||||
bi->hitBank, bi->hitBankIndex, branch_pc);
|
||||
ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
|
||||
tagTableCounterBits);
|
||||
// if the provider entry is not certified to be useful also update
|
||||
// the alternate prediction
|
||||
if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
|
||||
if (bi->altBank > 0) {
|
||||
ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
|
||||
tagTableCounterBits);
|
||||
DPRINTF(Tage, "Updating tag table entry (%d,%d) for"
|
||||
" branch %lx\n", bi->hitBank, bi->hitBankIndex,
|
||||
branch_pc);
|
||||
}
|
||||
if (bi->altBank == 0) {
|
||||
baseUpdate(branch_pc, taken, bi);
|
||||
}
|
||||
}
|
||||
|
||||
// update the u counter
|
||||
if (bi->tagePred != bi->altTaken) {
|
||||
unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
|
||||
bi->tagePred == taken, tagTableUBits);
|
||||
}
|
||||
} else {
|
||||
baseUpdate(branch_pc, taken, bi);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
|
||||
{
|
||||
TageBranchInfo* bi = (TageBranchInfo*)(b);
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
// UPDATE HISTORIES
|
||||
bool pathbit = ((branch_pc >> instShiftAmt) & 1);
|
||||
//on a squash, return pointers to this and recompute indices.
|
||||
//update user history
|
||||
updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
|
||||
tHist.pathHist = (tHist.pathHist << 1) + pathbit;
|
||||
tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
|
||||
|
||||
bi->ptGhist = tHist.ptGhist;
|
||||
bi->pathHist = tHist.pathHist;
|
||||
//prepare next index and tag computations for user branchs
|
||||
for (int i = 1; i <= nHistoryTables; i++)
|
||||
{
|
||||
bi->ci[i] = tHist.computeIndices[i].comp;
|
||||
bi->ct0[i] = tHist.computeTags[0][i].comp;
|
||||
bi->ct1[i] = tHist.computeTags[1][i].comp;
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, "
|
||||
"path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
|
||||
tHist.ptGhist);
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::squash(ThreadID tid, bool taken, void *bp_history)
|
||||
{
|
||||
TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
|
||||
"pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
|
||||
tHist.pathHist = bi->pathHist;
|
||||
tHist.ptGhist = bi->ptGhist;
|
||||
tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
|
||||
tHist.gHist[0] = (taken ? 1 : 0);
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tHist.computeIndices[i].comp = bi->ci[i];
|
||||
tHist.computeTags[0][i].comp = bi->ct0[i];
|
||||
tHist.computeTags[1][i].comp = bi->ct1[i];
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
delete bi;
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::squash(ThreadID tid, void *bp_history)
|
||||
{
|
||||
TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
|
||||
DPRINTF(Tage, "Deleting branch info: %lx\n", bi->branchPC);
|
||||
TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
DPRINTF(Tage, "Deleting branch info: %lx\n", bi->tageBranchInfo->branchPC);
|
||||
delete bi;
|
||||
}
|
||||
|
||||
bool
|
||||
TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
|
||||
{
|
||||
TageBranchInfo *bi = new TageBranchInfo(*tage);
|
||||
b = (void*)(bi);
|
||||
return tage->tagePredict(tid, branch_pc, cond_branch, bi->tageBranchInfo);
|
||||
}
|
||||
|
||||
bool
|
||||
TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
|
||||
{
|
||||
bool retval = predict(tid, branch_pc, true, bp_history);
|
||||
|
||||
TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
|
||||
DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
|
||||
updateHistories(tid, branch_pc, retval, bp_history);
|
||||
assert(threadHistory[tid].gHist ==
|
||||
&threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
|
||||
|
||||
tage->updateHistories(tid, branch_pc, retval, bi->tageBranchInfo, true);
|
||||
|
||||
return retval;
|
||||
}
|
||||
@@ -581,19 +127,8 @@ TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
|
||||
void
|
||||
TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
|
||||
{
|
||||
TageBranchInfo* bi = (TageBranchInfo*) bp_history;
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc);
|
||||
assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
|
||||
tHist.gHist[0] = 0;
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tHist.computeIndices[i].comp = bi->ci[i];
|
||||
tHist.computeTags[0][i].comp = bi->ct0[i];
|
||||
tHist.computeTags[1][i].comp = bi->ct1[i];
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
tage->btbUpdate(tid, branch_pc, bi->tageBranchInfo);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -601,122 +136,8 @@ TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
|
||||
{
|
||||
DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc);
|
||||
predict(tid, br_pc, false, bp_history);
|
||||
updateHistories(tid, br_pc, true, bp_history);
|
||||
assert(threadHistory[tid].gHist ==
|
||||
&threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::updateStats(bool taken, TageBranchInfo* bi)
|
||||
{
|
||||
if (taken == bi->tagePred) {
|
||||
// correct prediction
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ONLY: tageBimodalProviderCorrect++; break;
|
||||
case TAGE_LONGEST_MATCH: tageLongestMatchProviderCorrect++; break;
|
||||
case BIMODAL_ALT_MATCH: bimodalAltMatchProviderCorrect++; break;
|
||||
case TAGE_ALT_MATCH: tageAltMatchProviderCorrect++; break;
|
||||
}
|
||||
} else {
|
||||
// wrong prediction
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ONLY: tageBimodalProviderWrong++; break;
|
||||
case TAGE_LONGEST_MATCH:
|
||||
tageLongestMatchProviderWrong++;
|
||||
if (bi->altTaken == taken) {
|
||||
tageAltMatchProviderWouldHaveHit++;
|
||||
}
|
||||
break;
|
||||
case BIMODAL_ALT_MATCH:
|
||||
bimodalAltMatchProviderWrong++;
|
||||
break;
|
||||
case TAGE_ALT_MATCH:
|
||||
tageAltMatchProviderWrong++;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ALT_MATCH:
|
||||
case TAGE_ALT_MATCH:
|
||||
if (bi->longestMatchPred == taken) {
|
||||
tageLongestMatchProviderWouldHaveHit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (bi->provider) {
|
||||
case TAGE_LONGEST_MATCH:
|
||||
case TAGE_ALT_MATCH:
|
||||
tageLongestMatchProvider[bi->hitBank]++;
|
||||
tageAltMatchProvider[bi->altBank]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGE::regStats()
|
||||
{
|
||||
BPredUnit::regStats();
|
||||
|
||||
tageLongestMatchProviderCorrect
|
||||
.name(name() + ".tageLongestMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Longest Match is the provider and "
|
||||
"the prediction is correct");
|
||||
|
||||
tageAltMatchProviderCorrect
|
||||
.name(name() + ".tageAltMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Alt Match is the provider and "
|
||||
"the prediction is correct");
|
||||
|
||||
bimodalAltMatchProviderCorrect
|
||||
.name(name() + ".bimodalAltMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Alt Match is the bimodal and it is the "
|
||||
"provider and the prediction is correct");
|
||||
|
||||
tageBimodalProviderCorrect
|
||||
.name(name() + ".tageBimodalProviderCorrect")
|
||||
.desc("Number of times there are no hits on the TAGE tables "
|
||||
"and the bimodal prediction is correct");
|
||||
|
||||
tageLongestMatchProviderWrong
|
||||
.name(name() + ".tageLongestMatchProviderWrong")
|
||||
.desc("Number of times TAGE Longest Match is the provider and "
|
||||
"the prediction is wrong");
|
||||
|
||||
tageAltMatchProviderWrong
|
||||
.name(name() + ".tageAltMatchProviderWrong")
|
||||
.desc("Number of times TAGE Alt Match is the provider and "
|
||||
"the prediction is wrong");
|
||||
|
||||
bimodalAltMatchProviderWrong
|
||||
.name(name() + ".bimodalAltMatchProviderWrong")
|
||||
.desc("Number of times TAGE Alt Match is the bimodal and it is the "
|
||||
"provider and the prediction is wrong");
|
||||
|
||||
tageBimodalProviderWrong
|
||||
.name(name() + ".tageBimodalProviderWrong")
|
||||
.desc("Number of times there are no hits on the TAGE tables "
|
||||
"and the bimodal prediction is wrong");
|
||||
|
||||
tageAltMatchProviderWouldHaveHit
|
||||
.name(name() + ".tageAltMatchProviderWouldHaveHit")
|
||||
.desc("Number of times TAGE Longest Match is the provider, "
|
||||
"the prediction is wrong and Alt Match prediction was correct");
|
||||
|
||||
tageLongestMatchProviderWouldHaveHit
|
||||
.name(name() + ".tageLongestMatchProviderWouldHaveHit")
|
||||
.desc("Number of times TAGE Alt Match is the provider, the "
|
||||
"prediction is wrong and Longest Match prediction was correct");
|
||||
|
||||
tageLongestMatchProvider
|
||||
.init(nHistoryTables + 1)
|
||||
.name(name() + ".tageLongestMatchProvider")
|
||||
.desc("TAGE provider for longest match");
|
||||
|
||||
tageAltMatchProvider
|
||||
.init(nHistoryTables + 1)
|
||||
.name(name() + ".tageAltMatchProvider")
|
||||
.desc("TAGE provider for alt match");
|
||||
TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
|
||||
tage->updateHistories(tid, br_pc, true, bi->tageBranchInfo, true);
|
||||
}
|
||||
|
||||
TAGE*
|
||||
|
||||
@@ -55,11 +55,30 @@
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "cpu/pred/bpred_unit.hh"
|
||||
#include "cpu/pred/tage_base.hh"
|
||||
#include "params/TAGE.hh"
|
||||
|
||||
class TAGE: public BPredUnit
|
||||
{
|
||||
protected:
|
||||
TAGEBase *tage;
|
||||
|
||||
struct TageBranchInfo {
|
||||
TAGEBase::BranchInfo *tageBranchInfo;
|
||||
|
||||
TageBranchInfo(TAGEBase &tage) : tageBranchInfo(tage.makeBranchInfo())
|
||||
{}
|
||||
|
||||
virtual ~TageBranchInfo()
|
||||
{
|
||||
delete tageBranchInfo;
|
||||
}
|
||||
};
|
||||
|
||||
virtual bool predict(ThreadID tid, Addr branch_pc, bool cond_branch,
|
||||
void* &b);
|
||||
public:
|
||||
|
||||
TAGE(const TAGEParams *params);
|
||||
|
||||
// Base class methods.
|
||||
@@ -67,346 +86,10 @@ class TAGE: public BPredUnit
|
||||
bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history) override;
|
||||
void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history) override;
|
||||
void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed) override;
|
||||
bool squashed, const StaticInstPtr & inst,
|
||||
Addr corrTarget) override;
|
||||
virtual void squash(ThreadID tid, void *bp_history) override;
|
||||
unsigned getGHR(ThreadID tid, void *bp_history) const override;
|
||||
|
||||
virtual void regStats() override;
|
||||
|
||||
protected:
|
||||
// Prediction Structures
|
||||
|
||||
// Tage Entry
|
||||
struct TageEntry
|
||||
{
|
||||
int8_t ctr;
|
||||
uint16_t tag;
|
||||
uint8_t u;
|
||||
TageEntry() : ctr(0), tag(0), u(0) { }
|
||||
};
|
||||
|
||||
// Folded History Table - compressed history
|
||||
// to mix with instruction PC to index partially
|
||||
// tagged tables.
|
||||
struct FoldedHistory
|
||||
{
|
||||
unsigned comp;
|
||||
int compLength;
|
||||
int origLength;
|
||||
int outpoint;
|
||||
|
||||
void init(int original_length, int compressed_length)
|
||||
{
|
||||
comp = 0;
|
||||
origLength = original_length;
|
||||
compLength = compressed_length;
|
||||
outpoint = original_length % compressed_length;
|
||||
}
|
||||
|
||||
void update(uint8_t * h)
|
||||
{
|
||||
comp = (comp << 1) | h[0];
|
||||
comp ^= h[origLength] << outpoint;
|
||||
comp ^= (comp >> compLength);
|
||||
comp &= (ULL(1) << compLength) - 1;
|
||||
}
|
||||
};
|
||||
|
||||
// provider type
|
||||
enum {
|
||||
BIMODAL_ONLY = 0,
|
||||
TAGE_LONGEST_MATCH,
|
||||
BIMODAL_ALT_MATCH,
|
||||
TAGE_ALT_MATCH,
|
||||
LAST_TAGE_PROVIDER_TYPE = TAGE_ALT_MATCH
|
||||
};
|
||||
|
||||
// Primary branch history entry
|
||||
struct TageBranchInfo
|
||||
{
|
||||
int pathHist;
|
||||
int ptGhist;
|
||||
int hitBank;
|
||||
int hitBankIndex;
|
||||
int altBank;
|
||||
int altBankIndex;
|
||||
int bimodalIndex;
|
||||
|
||||
bool tagePred;
|
||||
bool altTaken;
|
||||
bool condBranch;
|
||||
bool longestMatchPred;
|
||||
bool pseudoNewAlloc;
|
||||
Addr branchPC;
|
||||
|
||||
// Pointer to dynamically allocated storage
|
||||
// to save table indices and folded histories.
|
||||
// To do one call to new instead of five.
|
||||
int *storage;
|
||||
|
||||
// Pointers to actual saved array within the dynamically
|
||||
// allocated storage.
|
||||
int *tableIndices;
|
||||
int *tableTags;
|
||||
int *ci;
|
||||
int *ct0;
|
||||
int *ct1;
|
||||
|
||||
// for stats purposes
|
||||
unsigned provider;
|
||||
|
||||
TageBranchInfo(int sz)
|
||||
: pathHist(0), ptGhist(0),
|
||||
hitBank(0), hitBankIndex(0),
|
||||
altBank(0), altBankIndex(0),
|
||||
bimodalIndex(0),
|
||||
tagePred(false), altTaken(false),
|
||||
condBranch(false), longestMatchPred(false),
|
||||
pseudoNewAlloc(false), branchPC(0),
|
||||
provider(-1)
|
||||
{
|
||||
storage = new int [sz * 5];
|
||||
tableIndices = storage;
|
||||
tableTags = storage + sz;
|
||||
ci = tableTags + sz;
|
||||
ct0 = ci + sz;
|
||||
ct1 = ct0 + sz;
|
||||
}
|
||||
|
||||
virtual ~TageBranchInfo()
|
||||
{
|
||||
delete[] storage;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Computes the index used to access the
|
||||
* bimodal table.
|
||||
* @param pc_in The unshifted branch PC.
|
||||
*/
|
||||
int bindex(Addr pc_in) const;
|
||||
|
||||
/**
|
||||
* Computes the index used to access a
|
||||
* partially tagged table.
|
||||
* @param tid The thread ID used to select the
|
||||
* global histories to use.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
inline int gindex(ThreadID tid, Addr pc, int bank) const;
|
||||
|
||||
/**
|
||||
* Utility function to shuffle the path history
|
||||
* depending on which tagged table we are accessing.
|
||||
* @param phist The path history.
|
||||
* @param size Number of path history bits to use.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
int F(int phist, int size, int bank) const;
|
||||
|
||||
/**
|
||||
* Computes the partial tag of a tagged table.
|
||||
* @param tid the thread ID used to select the
|
||||
* global histories to use.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
|
||||
|
||||
/**
|
||||
* Updates a direction counter based on the actual
|
||||
* branch outcome.
|
||||
* @param ctr Reference to counter to update.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param nbits Counter width.
|
||||
*/
|
||||
void ctrUpdate(int8_t & ctr, bool taken, int nbits);
|
||||
|
||||
/**
|
||||
* Updates an unsigned counter based on up/down parameter
|
||||
* @param ctr Reference to counter to update.
|
||||
* @param up Boolean indicating if the counter is incremented/decremented
|
||||
* If true it is incremented, if false it is decremented
|
||||
* @param nbits Counter width.
|
||||
*/
|
||||
void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits);
|
||||
|
||||
/**
|
||||
* Get a branch prediction from the bimodal
|
||||
* predictor.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bi Pointer to information on the
|
||||
* prediction.
|
||||
*/
|
||||
bool getBimodePred(Addr pc, TageBranchInfo* bi) const;
|
||||
|
||||
/**
|
||||
* Updates the bimodal predictor.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param taken The actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
void baseUpdate(Addr pc, bool taken, TageBranchInfo* bi);
|
||||
|
||||
/**
|
||||
* (Speculatively) updates the global branch history.
|
||||
* @param h Reference to pointer to global branch history.
|
||||
* @param dir (Predicted) outcome to update the histories
|
||||
* with.
|
||||
* @param tab
|
||||
* @param PT Reference to path history.
|
||||
*/
|
||||
void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
|
||||
|
||||
/**
|
||||
* Get a branch prediction from TAGE. *NOT* an override of
|
||||
* BpredUnit::predict().
|
||||
* @param tid The thread ID to select the global
|
||||
* histories to use.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param cond_branch True if the branch is conditional.
|
||||
* @param b Reference to wrapping pointer to allow storing
|
||||
* derived class prediction information in the base class.
|
||||
*/
|
||||
virtual bool predict(
|
||||
ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
|
||||
|
||||
/**
|
||||
* Update TAGE. Called at execute to repair histories on a misprediction
|
||||
* and at commit to update the tables.
|
||||
* @param tid The thread ID to select the global
|
||||
* histories to use.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
void update(ThreadID tid, Addr branch_pc, bool taken, TageBranchInfo* bi);
|
||||
|
||||
/**
|
||||
* (Speculatively) updates global histories (path and direction).
|
||||
* Also recomputes compressed (folded) histories based on the
|
||||
* branch direction.
|
||||
* @param tid The thread ID to select the histories
|
||||
* to update.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken (Predicted) branch direction.
|
||||
* @param b Wrapping pointer to TageBranchInfo (to allow
|
||||
* storing derived class prediction information in the
|
||||
* base class).
|
||||
*/
|
||||
void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b);
|
||||
|
||||
/**
|
||||
* Restores speculatively updated path and direction histories.
|
||||
* Also recomputes compressed (folded) histories based on the
|
||||
* correct branch outcome.
|
||||
* This version of squash() is called once on a branch misprediction.
|
||||
* @param tid The Thread ID to select the histories to rollback.
|
||||
* @param taken The correct branch outcome.
|
||||
* @param bp_history Wrapping pointer to TageBranchInfo (to allow
|
||||
* storing derived class prediction information in the
|
||||
* base class).
|
||||
* @post bp_history points to valid memory.
|
||||
*/
|
||||
virtual void squash(ThreadID tid, bool taken, void *bp_history);
|
||||
|
||||
/**
|
||||
* Update TAGE for conditional branches.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
* @nrand Random int number from 0 to 3
|
||||
*/
|
||||
virtual void condBranchUpdate(
|
||||
Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand);
|
||||
|
||||
/**
|
||||
* TAGE prediction called from TAGE::predict
|
||||
* @param tid The thread ID to select the global
|
||||
* histories to use.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param cond_branch True if the branch is conditional.
|
||||
* @param bi Pointer to the TageBranchInfo
|
||||
*/
|
||||
bool tagePredict(
|
||||
ThreadID tid, Addr branch_pc, bool cond_branch, TageBranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Update the stats
|
||||
* @param taken Actual branch outcome
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
virtual void updateStats(bool taken, TageBranchInfo* bi);
|
||||
|
||||
const unsigned logRatioBiModalHystEntries;
|
||||
const unsigned nHistoryTables;
|
||||
const unsigned tagTableCounterBits;
|
||||
const unsigned tagTableUBits;
|
||||
const unsigned histBufferSize;
|
||||
const unsigned minHist;
|
||||
const unsigned maxHist;
|
||||
const unsigned pathHistBits;
|
||||
|
||||
const std::vector<unsigned> tagTableTagWidths;
|
||||
const std::vector<int> logTagTableSizes;
|
||||
|
||||
std::vector<bool> btablePrediction;
|
||||
std::vector<bool> btableHysteresis;
|
||||
TageEntry **gtable;
|
||||
|
||||
// Keep per-thread histories to
|
||||
// support SMT.
|
||||
struct ThreadHistory {
|
||||
// Speculative path history
|
||||
// (LSB of branch address)
|
||||
int pathHist;
|
||||
|
||||
// Speculative branch direction
|
||||
// history (circular buffer)
|
||||
// @TODO Convert to std::vector<bool>
|
||||
uint8_t *globalHistory;
|
||||
|
||||
// Pointer to most recent branch outcome
|
||||
uint8_t* gHist;
|
||||
|
||||
// Index to most recent branch outcome
|
||||
int ptGhist;
|
||||
|
||||
// Speculative folded histories.
|
||||
FoldedHistory *computeIndices;
|
||||
FoldedHistory *computeTags[2];
|
||||
};
|
||||
|
||||
std::vector<ThreadHistory> threadHistory;
|
||||
|
||||
int *histLengths;
|
||||
int *tableIndices;
|
||||
int *tableTags;
|
||||
|
||||
int8_t useAltPredForNewlyAllocated;
|
||||
uint64_t tCounter;
|
||||
uint64_t logUResetPeriod;
|
||||
unsigned useAltOnNaBits;
|
||||
|
||||
// stats
|
||||
Stats::Scalar tageLongestMatchProviderCorrect;
|
||||
Stats::Scalar tageAltMatchProviderCorrect;
|
||||
Stats::Scalar bimodalAltMatchProviderCorrect;
|
||||
Stats::Scalar tageBimodalProviderCorrect;
|
||||
Stats::Scalar tageLongestMatchProviderWrong;
|
||||
Stats::Scalar tageAltMatchProviderWrong;
|
||||
Stats::Scalar bimodalAltMatchProviderWrong;
|
||||
Stats::Scalar tageBimodalProviderWrong;
|
||||
Stats::Scalar tageAltMatchProviderWouldHaveHit;
|
||||
Stats::Scalar tageLongestMatchProviderWouldHaveHit;
|
||||
|
||||
Stats::Vector tageLongestMatchProvider;
|
||||
Stats::Vector tageAltMatchProvider;
|
||||
};
|
||||
|
||||
#endif // __CPU_PRED_TAGE
|
||||
|
||||
803
src/cpu/pred/tage_base.cc
Normal file
803
src/cpu/pred/tage_base.cc
Normal file
@@ -0,0 +1,803 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The University of Wisconsin
|
||||
*
|
||||
* Copyright (c) 2006 INRIA (Institut National de Recherche en
|
||||
* Informatique et en Automatique / French National Research Institute
|
||||
* for Computer Science and Applied Mathematics)
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
|
||||
* from André Seznec's code.
|
||||
*/
|
||||
|
||||
/* @file
|
||||
* Implementation of a TAGE branch predictor
|
||||
*/
|
||||
|
||||
#include "cpu/pred/tage_base.hh"
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/logging.hh"
|
||||
#include "base/random.hh"
|
||||
#include "debug/Fetch.hh"
|
||||
#include "debug/Tage.hh"
|
||||
|
||||
TAGEBase::TAGEBase(const TAGEBaseParams *p)
|
||||
: SimObject(p),
|
||||
logRatioBiModalHystEntries(p->logRatioBiModalHystEntries),
|
||||
nHistoryTables(p->nHistoryTables),
|
||||
tagTableCounterBits(p->tagTableCounterBits),
|
||||
tagTableUBits(p->tagTableUBits),
|
||||
histBufferSize(p->histBufferSize),
|
||||
minHist(p->minHist),
|
||||
maxHist(p->maxHist),
|
||||
pathHistBits(p->pathHistBits),
|
||||
tagTableTagWidths(p->tagTableTagWidths),
|
||||
logTagTableSizes(p->logTagTableSizes),
|
||||
threadHistory(p->numThreads),
|
||||
logUResetPeriod(p->logUResetPeriod),
|
||||
numUseAltOnNa(p->numUseAltOnNa),
|
||||
useAltOnNaBits(p->useAltOnNaBits),
|
||||
maxNumAlloc(p->maxNumAlloc),
|
||||
noSkip(p->noSkip),
|
||||
speculativeHistUpdate(p->speculativeHistUpdate),
|
||||
instShiftAmt(p->instShiftAmt)
|
||||
{
|
||||
if (noSkip.empty()) {
|
||||
// Set all the table to enabled by default
|
||||
noSkip.resize(nHistoryTables + 1, true);
|
||||
}
|
||||
}
|
||||
|
||||
TAGEBase::BranchInfo*
|
||||
TAGEBase::makeBranchInfo() {
|
||||
return new BranchInfo(*this);
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::init()
|
||||
{
|
||||
// Current method for periodically resetting the u counter bits only
|
||||
// works for 1 or 2 bits
|
||||
// Also make sure that it is not 0
|
||||
assert(tagTableUBits <= 2 && (tagTableUBits > 0));
|
||||
|
||||
// we use int type for the path history, so it cannot be more than
|
||||
// its size
|
||||
assert(pathHistBits <= (sizeof(int)*8));
|
||||
|
||||
// initialize the counter to half of the period
|
||||
assert(logUResetPeriod != 0);
|
||||
tCounter = ULL(1) << (logUResetPeriod - 1);
|
||||
|
||||
assert(histBufferSize > maxHist * 2);
|
||||
|
||||
useAltPredForNewlyAllocated.resize(numUseAltOnNa, 0);
|
||||
|
||||
for (auto& history : threadHistory) {
|
||||
history.pathHist = 0;
|
||||
history.globalHistory = new uint8_t[histBufferSize];
|
||||
history.gHist = history.globalHistory;
|
||||
memset(history.gHist, 0, histBufferSize);
|
||||
history.ptGhist = 0;
|
||||
}
|
||||
|
||||
histLengths = new int [nHistoryTables+1];
|
||||
|
||||
calculateParameters();
|
||||
|
||||
assert(tagTableTagWidths.size() == (nHistoryTables+1));
|
||||
assert(logTagTableSizes.size() == (nHistoryTables+1));
|
||||
|
||||
// First entry is for the Bimodal table and it is untagged in this
|
||||
// implementation
|
||||
assert(tagTableTagWidths[0] == 0);
|
||||
|
||||
for (auto& history : threadHistory) {
|
||||
history.computeIndices = new FoldedHistory[nHistoryTables+1];
|
||||
history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
|
||||
history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
|
||||
|
||||
initFoldedHistories(history);
|
||||
}
|
||||
|
||||
const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
|
||||
btablePrediction.resize(bimodalTableSize, false);
|
||||
btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
|
||||
true);
|
||||
|
||||
gtable = new TageEntry*[nHistoryTables + 1];
|
||||
buildTageTables();
|
||||
|
||||
tableIndices = new int [nHistoryTables+1];
|
||||
tableTags = new int [nHistoryTables+1];
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::initFoldedHistories(ThreadHistory & history)
|
||||
{
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
history.computeIndices[i].init(
|
||||
histLengths[i], (logTagTableSizes[i]));
|
||||
history.computeTags[0][i].init(
|
||||
history.computeIndices[i].origLength, tagTableTagWidths[i]);
|
||||
history.computeTags[1][i].init(
|
||||
history.computeIndices[i].origLength, tagTableTagWidths[i]-1);
|
||||
DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
|
||||
histLengths[i], logTagTableSizes[i], tagTableTagWidths[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::buildTageTables()
|
||||
{
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::calculateParameters()
|
||||
{
|
||||
histLengths[1] = minHist;
|
||||
histLengths[nHistoryTables] = maxHist;
|
||||
|
||||
for (int i = 2; i <= nHistoryTables; i++) {
|
||||
histLengths[i] = (int) (((double) minHist *
|
||||
pow ((double) (maxHist) / (double) minHist,
|
||||
(double) (i - 1) / (double) ((nHistoryTables- 1))))
|
||||
+ 0.5);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::btbUpdate(ThreadID tid, Addr branch_pc, BranchInfo* &bi)
|
||||
{
|
||||
if (speculativeHistUpdate) {
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc);
|
||||
assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
|
||||
tHist.gHist[0] = 0;
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tHist.computeIndices[i].comp = bi->ci[i];
|
||||
tHist.computeTags[0][i].comp = bi->ct0[i];
|
||||
tHist.computeTags[1][i].comp = bi->ct1[i];
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
TAGEBase::bindex(Addr pc_in) const
|
||||
{
|
||||
return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) - 1));
|
||||
}
|
||||
|
||||
int
|
||||
TAGEBase::F(int A, int size, int bank) const
|
||||
{
|
||||
int A1, A2;
|
||||
|
||||
A = A & ((ULL(1) << size) - 1);
|
||||
A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
|
||||
A2 = (A >> logTagTableSizes[bank]);
|
||||
A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
|
||||
+ (A2 >> (logTagTableSizes[bank] - bank));
|
||||
A = A1 ^ A2;
|
||||
A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
|
||||
+ (A >> (logTagTableSizes[bank] - bank));
|
||||
return (A);
|
||||
}
|
||||
|
||||
// gindex computes a full hash of pc, ghist and pathHist
|
||||
int
|
||||
TAGEBase::gindex(ThreadID tid, Addr pc, int bank) const
|
||||
{
|
||||
int index;
|
||||
int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
|
||||
histLengths[bank];
|
||||
const unsigned int shiftedPc = pc >> instShiftAmt;
|
||||
index =
|
||||
shiftedPc ^
|
||||
(shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
|
||||
threadHistory[tid].computeIndices[bank].comp ^
|
||||
F(threadHistory[tid].pathHist, hlen, bank);
|
||||
|
||||
return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
|
||||
}
|
||||
|
||||
|
||||
// Tag computation
|
||||
uint16_t
|
||||
TAGEBase::gtag(ThreadID tid, Addr pc, int bank) const
|
||||
{
|
||||
int tag = (pc >> instShiftAmt) ^
|
||||
threadHistory[tid].computeTags[0][bank].comp ^
|
||||
(threadHistory[tid].computeTags[1][bank].comp << 1);
|
||||
|
||||
return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
|
||||
}
|
||||
|
||||
|
||||
// Up-down saturating counter
|
||||
template<typename T>
|
||||
void
|
||||
TAGEBase::ctrUpdate(T & ctr, bool taken, int nbits)
|
||||
{
|
||||
assert(nbits <= sizeof(T) << 3);
|
||||
if (taken) {
|
||||
if (ctr < ((1 << (nbits - 1)) - 1))
|
||||
ctr++;
|
||||
} else {
|
||||
if (ctr > -(1 << (nbits - 1)))
|
||||
ctr--;
|
||||
}
|
||||
}
|
||||
|
||||
// int8_t and int versions of this function may be needed
|
||||
template void TAGEBase::ctrUpdate(int8_t & ctr, bool taken, int nbits);
|
||||
template void TAGEBase::ctrUpdate(int & ctr, bool taken, int nbits);
|
||||
|
||||
// Up-down unsigned saturating counter
|
||||
void
|
||||
TAGEBase::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
|
||||
{
|
||||
assert(nbits <= sizeof(uint8_t) << 3);
|
||||
if (up) {
|
||||
if (ctr < ((1 << nbits) - 1))
|
||||
ctr++;
|
||||
} else {
|
||||
if (ctr)
|
||||
ctr--;
|
||||
}
|
||||
}
|
||||
|
||||
// Bimodal prediction
|
||||
bool
|
||||
TAGEBase::getBimodePred(Addr pc, BranchInfo* bi) const
|
||||
{
|
||||
return btablePrediction[bi->bimodalIndex];
|
||||
}
|
||||
|
||||
|
||||
// Update the bimodal predictor: a hysteresis bit is shared among N prediction
|
||||
// bits (N = 2 ^ logRatioBiModalHystEntries)
|
||||
void
|
||||
TAGEBase::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
|
||||
{
|
||||
int inter = (btablePrediction[bi->bimodalIndex] << 1)
|
||||
+ btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
|
||||
if (taken) {
|
||||
if (inter < 3)
|
||||
inter++;
|
||||
} else if (inter > 0) {
|
||||
inter--;
|
||||
}
|
||||
const bool pred = inter >> 1;
|
||||
const bool hyst = inter & 1;
|
||||
btablePrediction[bi->bimodalIndex] = pred;
|
||||
btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] = hyst;
|
||||
DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred, hyst);
|
||||
}
|
||||
|
||||
// shifting the global history: we manage the history in a big table in order
|
||||
// to reduce simulation time
|
||||
void
|
||||
TAGEBase::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
|
||||
{
|
||||
if (pt == 0) {
|
||||
DPRINTF(Tage, "Rolling over the histories\n");
|
||||
// Copy beginning of globalHistoryBuffer to end, such that
|
||||
// the last maxHist outcomes are still reachable
|
||||
// through pt[0 .. maxHist - 1].
|
||||
for (int i = 0; i < maxHist; i++)
|
||||
tab[histBufferSize - maxHist + i] = tab[i];
|
||||
pt = histBufferSize - maxHist;
|
||||
h = &tab[pt];
|
||||
}
|
||||
pt--;
|
||||
h--;
|
||||
h[0] = (dir) ? 1 : 0;
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::calculateIndicesAndTags(ThreadID tid, Addr branch_pc,
|
||||
BranchInfo* bi)
|
||||
{
|
||||
// computes the table addresses and the partial tags
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tableIndices[i] = gindex(tid, branch_pc, i);
|
||||
bi->tableIndices[i] = tableIndices[i];
|
||||
tableTags[i] = gtag(tid, branch_pc, i);
|
||||
bi->tableTags[i] = tableTags[i];
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
TAGEBase::getUseAltIdx(BranchInfo* bi)
|
||||
{
|
||||
// There is only 1 counter on the base TAGE implementation
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
TAGEBase::tagePredict(ThreadID tid, Addr branch_pc,
|
||||
bool cond_branch, BranchInfo* bi)
|
||||
{
|
||||
Addr pc = branch_pc;
|
||||
bool pred_taken = true;
|
||||
|
||||
if (cond_branch) {
|
||||
// TAGE prediction
|
||||
|
||||
calculateIndicesAndTags(tid, pc, bi);
|
||||
|
||||
bi->bimodalIndex = bindex(pc);
|
||||
|
||||
bi->hitBank = 0;
|
||||
bi->altBank = 0;
|
||||
//Look for the bank with longest matching history
|
||||
for (int i = nHistoryTables; i > 0; i--) {
|
||||
if (noSkip[i] &&
|
||||
gtable[i][tableIndices[i]].tag == tableTags[i]) {
|
||||
bi->hitBank = i;
|
||||
bi->hitBankIndex = tableIndices[bi->hitBank];
|
||||
break;
|
||||
}
|
||||
}
|
||||
//Look for the alternate bank
|
||||
for (int i = bi->hitBank - 1; i > 0; i--) {
|
||||
if (noSkip[i] &&
|
||||
gtable[i][tableIndices[i]].tag == tableTags[i]) {
|
||||
bi->altBank = i;
|
||||
bi->altBankIndex = tableIndices[bi->altBank];
|
||||
break;
|
||||
}
|
||||
}
|
||||
//computes the prediction and the alternate prediction
|
||||
if (bi->hitBank > 0) {
|
||||
if (bi->altBank > 0) {
|
||||
bi->altTaken =
|
||||
gtable[bi->altBank][tableIndices[bi->altBank]].ctr >= 0;
|
||||
extraAltCalc(bi);
|
||||
}else {
|
||||
bi->altTaken = getBimodePred(pc, bi);
|
||||
}
|
||||
|
||||
bi->longestMatchPred =
|
||||
gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
|
||||
bi->pseudoNewAlloc =
|
||||
abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <= 1;
|
||||
|
||||
//if the entry is recognized as a newly allocated entry and
|
||||
//useAltPredForNewlyAllocated is positive use the alternate
|
||||
//prediction
|
||||
if ((useAltPredForNewlyAllocated[getUseAltIdx(bi)] < 0) ||
|
||||
! bi->pseudoNewAlloc) {
|
||||
bi->tagePred = bi->longestMatchPred;
|
||||
bi->provider = TAGE_LONGEST_MATCH;
|
||||
} else {
|
||||
bi->tagePred = bi->altTaken;
|
||||
bi->provider = bi->altBank ? TAGE_ALT_MATCH
|
||||
: BIMODAL_ALT_MATCH;
|
||||
}
|
||||
} else {
|
||||
bi->altTaken = getBimodePred(pc, bi);
|
||||
bi->tagePred = bi->altTaken;
|
||||
bi->longestMatchPred = bi->altTaken;
|
||||
bi->provider = BIMODAL_ONLY;
|
||||
}
|
||||
//end TAGE prediction
|
||||
|
||||
pred_taken = (bi->tagePred);
|
||||
DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d, altPred:%d\n",
|
||||
branch_pc, pred_taken, bi->tagePred, bi->altTaken);
|
||||
}
|
||||
bi->branchPC = branch_pc;
|
||||
bi->condBranch = cond_branch;
|
||||
return pred_taken;
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::adjustAlloc(bool & alloc, bool taken)
|
||||
{
|
||||
// Nothing for this base class implementation
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::handleAllocAndUReset(bool alloc, bool taken, BranchInfo* bi,
|
||||
int nrand)
|
||||
{
|
||||
if (alloc) {
|
||||
// is there some "unuseful" entry to allocate
|
||||
uint8_t min = 1;
|
||||
for (int i = nHistoryTables; i > bi->hitBank; i--) {
|
||||
if (gtable[i][bi->tableIndices[i]].u < min) {
|
||||
min = gtable[i][bi->tableIndices[i]].u;
|
||||
}
|
||||
}
|
||||
|
||||
// we allocate an entry with a longer history
|
||||
// to avoid ping-pong, we do not choose systematically the next
|
||||
// entry, but among the 3 next entries
|
||||
int Y = nrand &
|
||||
((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
|
||||
int X = bi->hitBank + 1;
|
||||
if (Y & 1) {
|
||||
X++;
|
||||
if (Y & 2)
|
||||
X++;
|
||||
}
|
||||
// No entry available, forces one to be available
|
||||
if (min > 0) {
|
||||
gtable[X][bi->tableIndices[X]].u = 0;
|
||||
}
|
||||
|
||||
|
||||
//Allocate entries
|
||||
unsigned numAllocated = 0;
|
||||
for (int i = X; i <= nHistoryTables; i++) {
|
||||
if ((gtable[i][bi->tableIndices[i]].u == 0)) {
|
||||
gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
|
||||
gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
|
||||
++numAllocated;
|
||||
if (numAllocated == maxNumAlloc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tCounter++;
|
||||
|
||||
handleUReset();
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::handleUReset()
|
||||
{
|
||||
//periodic reset of u: reset is not complete but bit by bit
|
||||
if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
|
||||
// reset least significant bit
|
||||
// most significant bit becomes least significant bit
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
|
||||
resetUctr(gtable[i][j].u);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::resetUctr(uint8_t & u)
|
||||
{
|
||||
u >>= 1;
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::condBranchUpdate(ThreadID tid, Addr branch_pc, bool taken,
|
||||
BranchInfo* bi, int nrand, Addr corrTarget)
|
||||
{
|
||||
// TAGE UPDATE
|
||||
// try to allocate a new entries only if prediction was wrong
|
||||
bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
|
||||
if (bi->hitBank > 0) {
|
||||
// Manage the selection between longest matching and alternate
|
||||
// matching for "pseudo"-newly allocated longest matching entry
|
||||
bool PseudoNewAlloc = bi->pseudoNewAlloc;
|
||||
// an entry is considered as newly allocated if its prediction
|
||||
// counter is weak
|
||||
if (PseudoNewAlloc) {
|
||||
if (bi->longestMatchPred == taken) {
|
||||
alloc = false;
|
||||
}
|
||||
// if it was delivering the correct prediction, no need to
|
||||
// allocate new entry even if the overall prediction was false
|
||||
if (bi->longestMatchPred != bi->altTaken) {
|
||||
ctrUpdate(useAltPredForNewlyAllocated[getUseAltIdx(bi)],
|
||||
bi->altTaken == taken, useAltOnNaBits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
adjustAlloc(alloc, taken);
|
||||
|
||||
handleAllocAndUReset(alloc, taken, bi, nrand);
|
||||
|
||||
handleTAGEUpdate(branch_pc, taken, bi);
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::handleTAGEUpdate(Addr branch_pc, bool taken, BranchInfo* bi)
|
||||
{
|
||||
if (bi->hitBank > 0) {
|
||||
DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n",
|
||||
bi->hitBank, bi->hitBankIndex, branch_pc);
|
||||
ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
|
||||
tagTableCounterBits);
|
||||
// if the provider entry is not certified to be useful also update
|
||||
// the alternate prediction
|
||||
if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
|
||||
if (bi->altBank > 0) {
|
||||
ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
|
||||
tagTableCounterBits);
|
||||
DPRINTF(Tage, "Updating tag table entry (%d,%d) for"
|
||||
" branch %lx\n", bi->hitBank, bi->hitBankIndex,
|
||||
branch_pc);
|
||||
}
|
||||
if (bi->altBank == 0) {
|
||||
baseUpdate(branch_pc, taken, bi);
|
||||
}
|
||||
}
|
||||
|
||||
// update the u counter
|
||||
if (bi->tagePred != bi->altTaken) {
|
||||
unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
|
||||
bi->tagePred == taken, tagTableUBits);
|
||||
}
|
||||
} else {
|
||||
baseUpdate(branch_pc, taken, bi);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::updateHistories(ThreadID tid, Addr branch_pc, bool taken,
|
||||
BranchInfo* bi, bool speculative,
|
||||
const StaticInstPtr &inst, Addr target)
|
||||
{
|
||||
if (speculative != speculativeHistUpdate) {
|
||||
return;
|
||||
}
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
// UPDATE HISTORIES
|
||||
bool pathbit = ((branch_pc >> instShiftAmt) & 1);
|
||||
//on a squash, return pointers to this and recompute indices.
|
||||
//update user history
|
||||
updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
|
||||
tHist.pathHist = (tHist.pathHist << 1) + pathbit;
|
||||
tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
|
||||
|
||||
if (speculative) {
|
||||
bi->ptGhist = tHist.ptGhist;
|
||||
bi->pathHist = tHist.pathHist;
|
||||
}
|
||||
|
||||
//prepare next index and tag computations for user branchs
|
||||
for (int i = 1; i <= nHistoryTables; i++)
|
||||
{
|
||||
if (speculative) {
|
||||
bi->ci[i] = tHist.computeIndices[i].comp;
|
||||
bi->ct0[i] = tHist.computeTags[0][i].comp;
|
||||
bi->ct1[i] = tHist.computeTags[1][i].comp;
|
||||
}
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, "
|
||||
"path Hist: %x; pointer:%d\n", branch_pc, taken, tHist.pathHist,
|
||||
tHist.ptGhist);
|
||||
assert(threadHistory[tid].gHist ==
|
||||
&threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::squash(ThreadID tid, bool taken, TAGEBase::BranchInfo *bi,
|
||||
Addr target)
|
||||
{
|
||||
if (!speculativeHistUpdate) {
|
||||
/* If there are no speculative updates, no actions are needed */
|
||||
return;
|
||||
}
|
||||
|
||||
ThreadHistory& tHist = threadHistory[tid];
|
||||
DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
|
||||
"pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
|
||||
tHist.pathHist = bi->pathHist;
|
||||
tHist.ptGhist = bi->ptGhist;
|
||||
tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
|
||||
tHist.gHist[0] = (taken ? 1 : 0);
|
||||
for (int i = 1; i <= nHistoryTables; i++) {
|
||||
tHist.computeIndices[i].comp = bi->ci[i];
|
||||
tHist.computeTags[0][i].comp = bi->ct0[i];
|
||||
tHist.computeTags[1][i].comp = bi->ct1[i];
|
||||
tHist.computeIndices[i].update(tHist.gHist);
|
||||
tHist.computeTags[0][i].update(tHist.gHist);
|
||||
tHist.computeTags[1][i].update(tHist.gHist);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::extraAltCalc(BranchInfo* bi)
|
||||
{
|
||||
// do nothing. This is only used in some derived classes
|
||||
return;
|
||||
}
|
||||
|
||||
int
|
||||
TAGEBase::getRandom()
|
||||
{
|
||||
return random_mt.random<int>();
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::updateStats(bool taken, BranchInfo* bi)
|
||||
{
|
||||
if (taken == bi->tagePred) {
|
||||
// correct prediction
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ONLY: tageBimodalProviderCorrect++; break;
|
||||
case TAGE_LONGEST_MATCH: tageLongestMatchProviderCorrect++; break;
|
||||
case BIMODAL_ALT_MATCH: bimodalAltMatchProviderCorrect++; break;
|
||||
case TAGE_ALT_MATCH: tageAltMatchProviderCorrect++; break;
|
||||
}
|
||||
} else {
|
||||
// wrong prediction
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ONLY: tageBimodalProviderWrong++; break;
|
||||
case TAGE_LONGEST_MATCH:
|
||||
tageLongestMatchProviderWrong++;
|
||||
if (bi->altTaken == taken) {
|
||||
tageAltMatchProviderWouldHaveHit++;
|
||||
}
|
||||
break;
|
||||
case BIMODAL_ALT_MATCH:
|
||||
bimodalAltMatchProviderWrong++;
|
||||
break;
|
||||
case TAGE_ALT_MATCH:
|
||||
tageAltMatchProviderWrong++;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (bi->provider) {
|
||||
case BIMODAL_ALT_MATCH:
|
||||
case TAGE_ALT_MATCH:
|
||||
if (bi->longestMatchPred == taken) {
|
||||
tageLongestMatchProviderWouldHaveHit++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (bi->provider) {
|
||||
case TAGE_LONGEST_MATCH:
|
||||
case TAGE_ALT_MATCH:
|
||||
tageLongestMatchProvider[bi->hitBank]++;
|
||||
tageAltMatchProvider[bi->altBank]++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
TAGEBase::getGHR(ThreadID tid, BranchInfo *bi) const
|
||||
{
|
||||
unsigned val = 0;
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
// Make sure we don't go out of bounds
|
||||
int gh_offset = bi->ptGhist + i;
|
||||
assert(&(threadHistory[tid].globalHistory[gh_offset]) <
|
||||
threadHistory[tid].globalHistory + histBufferSize);
|
||||
val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void
|
||||
TAGEBase::regStats()
|
||||
{
|
||||
tageLongestMatchProviderCorrect
|
||||
.name(name() + ".tageLongestMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Longest Match is the provider and "
|
||||
"the prediction is correct");
|
||||
|
||||
tageAltMatchProviderCorrect
|
||||
.name(name() + ".tageAltMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Alt Match is the provider and "
|
||||
"the prediction is correct");
|
||||
|
||||
bimodalAltMatchProviderCorrect
|
||||
.name(name() + ".bimodalAltMatchProviderCorrect")
|
||||
.desc("Number of times TAGE Alt Match is the bimodal and it is the "
|
||||
"provider and the prediction is correct");
|
||||
|
||||
tageBimodalProviderCorrect
|
||||
.name(name() + ".tageBimodalProviderCorrect")
|
||||
.desc("Number of times there are no hits on the TAGE tables "
|
||||
"and the bimodal prediction is correct");
|
||||
|
||||
tageLongestMatchProviderWrong
|
||||
.name(name() + ".tageLongestMatchProviderWrong")
|
||||
.desc("Number of times TAGE Longest Match is the provider and "
|
||||
"the prediction is wrong");
|
||||
|
||||
tageAltMatchProviderWrong
|
||||
.name(name() + ".tageAltMatchProviderWrong")
|
||||
.desc("Number of times TAGE Alt Match is the provider and "
|
||||
"the prediction is wrong");
|
||||
|
||||
bimodalAltMatchProviderWrong
|
||||
.name(name() + ".bimodalAltMatchProviderWrong")
|
||||
.desc("Number of times TAGE Alt Match is the bimodal and it is the "
|
||||
"provider and the prediction is wrong");
|
||||
|
||||
tageBimodalProviderWrong
|
||||
.name(name() + ".tageBimodalProviderWrong")
|
||||
.desc("Number of times there are no hits on the TAGE tables "
|
||||
"and the bimodal prediction is wrong");
|
||||
|
||||
tageAltMatchProviderWouldHaveHit
|
||||
.name(name() + ".tageAltMatchProviderWouldHaveHit")
|
||||
.desc("Number of times TAGE Longest Match is the provider, "
|
||||
"the prediction is wrong and Alt Match prediction was correct");
|
||||
|
||||
tageLongestMatchProviderWouldHaveHit
|
||||
.name(name() + ".tageLongestMatchProviderWouldHaveHit")
|
||||
.desc("Number of times TAGE Alt Match is the provider, the "
|
||||
"prediction is wrong and Longest Match prediction was correct");
|
||||
|
||||
tageLongestMatchProvider
|
||||
.init(nHistoryTables + 1)
|
||||
.name(name() + ".tageLongestMatchProvider")
|
||||
.desc("TAGE provider for longest match");
|
||||
|
||||
tageAltMatchProvider
|
||||
.init(nHistoryTables + 1)
|
||||
.name(name() + ".tageAltMatchProvider")
|
||||
.desc("TAGE provider for alt match");
|
||||
}
|
||||
|
||||
int8_t
|
||||
TAGEBase::getCtr(int hitBank, int hitBankIndex) const
|
||||
{
|
||||
return gtable[hitBank][hitBankIndex].ctr;
|
||||
}
|
||||
|
||||
unsigned
|
||||
TAGEBase::getTageCtrBits() const
|
||||
{
|
||||
return tagTableCounterBits;
|
||||
}
|
||||
|
||||
int
|
||||
TAGEBase::getPathHist(ThreadID tid) const
|
||||
{
|
||||
return threadHistory[tid].pathHist;
|
||||
}
|
||||
|
||||
bool
|
||||
TAGEBase::isSpeculativeUpdateEnabled() const
|
||||
{
|
||||
return speculativeHistUpdate;
|
||||
}
|
||||
|
||||
TAGEBase*
|
||||
TAGEBaseParams::create()
|
||||
{
|
||||
return new TAGEBase(this);
|
||||
}
|
||||
500
src/cpu/pred/tage_base.hh
Normal file
500
src/cpu/pred/tage_base.hh
Normal file
@@ -0,0 +1,500 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The University of Wisconsin
|
||||
*
|
||||
* Copyright (c) 2006 INRIA (Institut National de Recherche en
|
||||
* Informatique et en Automatique / French National Research Institute
|
||||
* for Computer Science and Applied Mathematics)
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
|
||||
* from André Seznec's code.
|
||||
*/
|
||||
|
||||
/* @file
|
||||
* Implementation of a TAGE branch predictor. TAGE is a global-history based
|
||||
* branch predictor. It features a PC-indexed bimodal predictor and N
|
||||
* partially tagged tables, indexed with a hash of the PC and the global
|
||||
* branch history. The different lengths of global branch history used to
|
||||
* index the partially tagged tables grow geometrically. A small path history
|
||||
* is also used in the hash.
|
||||
*
|
||||
* All TAGE tables are accessed in parallel, and the one using the longest
|
||||
* history that matches provides the prediction (some exceptions apply).
|
||||
* Entries are allocated in components using a longer history than the
|
||||
* one that predicted when the prediction is incorrect.
|
||||
*/
|
||||
|
||||
#ifndef __CPU_PRED_TAGE_BASE
|
||||
#define __CPU_PRED_TAGE_BASE
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
#include "params/TAGEBase.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
class TAGEBase : public SimObject
|
||||
{
|
||||
public:
|
||||
TAGEBase(const TAGEBaseParams *p);
|
||||
void regStats() override;
|
||||
void init() override;
|
||||
|
||||
protected:
|
||||
// Prediction Structures
|
||||
|
||||
// Tage Entry
|
||||
struct TageEntry
|
||||
{
|
||||
int8_t ctr;
|
||||
uint16_t tag;
|
||||
uint8_t u;
|
||||
TageEntry() : ctr(0), tag(0), u(0) { }
|
||||
};
|
||||
|
||||
// Folded History Table - compressed history
|
||||
// to mix with instruction PC to index partially
|
||||
// tagged tables.
|
||||
struct FoldedHistory
|
||||
{
|
||||
unsigned comp;
|
||||
int compLength;
|
||||
int origLength;
|
||||
int outpoint;
|
||||
int bufferSize;
|
||||
|
||||
FoldedHistory()
|
||||
{
|
||||
comp = 0;
|
||||
}
|
||||
|
||||
void init(int original_length, int compressed_length)
|
||||
{
|
||||
origLength = original_length;
|
||||
compLength = compressed_length;
|
||||
outpoint = original_length % compressed_length;
|
||||
}
|
||||
|
||||
void update(uint8_t * h)
|
||||
{
|
||||
comp = (comp << 1) | h[0];
|
||||
comp ^= h[origLength] << outpoint;
|
||||
comp ^= (comp >> compLength);
|
||||
comp &= (ULL(1) << compLength) - 1;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
// provider type
|
||||
enum {
|
||||
BIMODAL_ONLY = 0,
|
||||
TAGE_LONGEST_MATCH,
|
||||
BIMODAL_ALT_MATCH,
|
||||
TAGE_ALT_MATCH,
|
||||
LAST_TAGE_PROVIDER_TYPE = TAGE_ALT_MATCH
|
||||
};
|
||||
|
||||
// Primary branch history entry
|
||||
struct BranchInfo
|
||||
{
|
||||
int pathHist;
|
||||
int ptGhist;
|
||||
int hitBank;
|
||||
int hitBankIndex;
|
||||
int altBank;
|
||||
int altBankIndex;
|
||||
int bimodalIndex;
|
||||
|
||||
bool tagePred;
|
||||
bool altTaken;
|
||||
bool condBranch;
|
||||
bool longestMatchPred;
|
||||
bool pseudoNewAlloc;
|
||||
Addr branchPC;
|
||||
|
||||
// Pointer to dynamically allocated storage
|
||||
// to save table indices and folded histories.
|
||||
// To do one call to new instead of five.
|
||||
int *storage;
|
||||
|
||||
// Pointers to actual saved array within the dynamically
|
||||
// allocated storage.
|
||||
int *tableIndices;
|
||||
int *tableTags;
|
||||
int *ci;
|
||||
int *ct0;
|
||||
int *ct1;
|
||||
|
||||
// for stats purposes
|
||||
unsigned provider;
|
||||
|
||||
BranchInfo(const TAGEBase &tage)
|
||||
: pathHist(0), ptGhist(0),
|
||||
hitBank(0), hitBankIndex(0),
|
||||
altBank(0), altBankIndex(0),
|
||||
bimodalIndex(0),
|
||||
tagePred(false), altTaken(false),
|
||||
condBranch(false), longestMatchPred(false),
|
||||
pseudoNewAlloc(false), branchPC(0),
|
||||
provider(-1)
|
||||
{
|
||||
int sz = tage.nHistoryTables + 1;
|
||||
storage = new int [sz * 5];
|
||||
tableIndices = storage;
|
||||
tableTags = storage + sz;
|
||||
ci = tableTags + sz;
|
||||
ct0 = ci + sz;
|
||||
ct1 = ct0 + sz;
|
||||
}
|
||||
|
||||
virtual ~BranchInfo()
|
||||
{
|
||||
delete[] storage;
|
||||
}
|
||||
};
|
||||
|
||||
virtual BranchInfo *makeBranchInfo();
|
||||
|
||||
/**
|
||||
* Computes the index used to access the
|
||||
* bimodal table.
|
||||
* @param pc_in The unshifted branch PC.
|
||||
*/
|
||||
virtual int bindex(Addr pc_in) const;
|
||||
|
||||
/**
|
||||
* Computes the index used to access a
|
||||
* partially tagged table.
|
||||
* @param tid The thread ID used to select the
|
||||
* global histories to use.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
virtual int gindex(ThreadID tid, Addr pc, int bank) const;
|
||||
|
||||
/**
|
||||
* Utility function to shuffle the path history
|
||||
* depending on which tagged table we are accessing.
|
||||
* @param phist The path history.
|
||||
* @param size Number of path history bits to use.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
virtual int F(int phist, int size, int bank) const;
|
||||
|
||||
/**
|
||||
* Computes the partial tag of a tagged table.
|
||||
* @param tid the thread ID used to select the
|
||||
* global histories to use.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bank The partially tagged table to access.
|
||||
*/
|
||||
virtual uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
|
||||
|
||||
/**
|
||||
* Updates a direction counter based on the actual
|
||||
* branch outcome.
|
||||
* @param ctr Reference to counter to update.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param nbits Counter width.
|
||||
*/
|
||||
template<typename T>
|
||||
static void ctrUpdate(T & ctr, bool taken, int nbits);
|
||||
|
||||
/**
|
||||
* Updates an unsigned counter based on up/down parameter
|
||||
* @param ctr Reference to counter to update.
|
||||
* @param up Boolean indicating if the counter is incremented/decremented
|
||||
* If true it is incremented, if false it is decremented
|
||||
* @param nbits Counter width.
|
||||
*/
|
||||
static void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits);
|
||||
|
||||
/**
|
||||
* Get a branch prediction from the bimodal
|
||||
* predictor.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param bi Pointer to information on the
|
||||
* prediction.
|
||||
*/
|
||||
virtual bool getBimodePred(Addr pc, BranchInfo* bi) const;
|
||||
|
||||
/**
|
||||
* Updates the bimodal predictor.
|
||||
* @param pc The unshifted branch PC.
|
||||
* @param taken The actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
void baseUpdate(Addr pc, bool taken, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* (Speculatively) updates the global branch history.
|
||||
* @param h Reference to pointer to global branch history.
|
||||
* @param dir (Predicted) outcome to update the histories
|
||||
* with.
|
||||
* @param tab
|
||||
* @param PT Reference to path history.
|
||||
*/
|
||||
void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
|
||||
|
||||
/**
|
||||
* Update TAGE. Called at execute to repair histories on a misprediction
|
||||
* and at commit to update the tables.
|
||||
* @param tid The thread ID to select the global
|
||||
* histories to use.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* (Speculatively) updates global histories (path and direction).
|
||||
* Also recomputes compressed (folded) histories based on the
|
||||
* branch direction.
|
||||
* @param tid The thread ID to select the histories
|
||||
* to update.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken (Predicted) branch direction.
|
||||
* @param b Wrapping pointer to BranchInfo (to allow
|
||||
* storing derived class prediction information in the
|
||||
* base class).
|
||||
*/
|
||||
virtual void updateHistories(
|
||||
ThreadID tid, Addr branch_pc, bool taken, BranchInfo* b,
|
||||
bool speculative,
|
||||
const StaticInstPtr & inst = StaticInst::nullStaticInstPtr,
|
||||
Addr target = MaxAddr);
|
||||
|
||||
/**
|
||||
* Restores speculatively updated path and direction histories.
|
||||
* Also recomputes compressed (folded) histories based on the
|
||||
* correct branch outcome.
|
||||
* This version of squash() is called once on a branch misprediction.
|
||||
* @param tid The Thread ID to select the histories to rollback.
|
||||
* @param taken The correct branch outcome.
|
||||
* @param bp_history Wrapping pointer to BranchInfo (to allow
|
||||
* storing derived class prediction information in the
|
||||
* base class).
|
||||
* @param target The correct branch target
|
||||
* @post bp_history points to valid memory.
|
||||
*/
|
||||
virtual void squash(
|
||||
ThreadID tid, bool taken, BranchInfo *bi, Addr target);
|
||||
|
||||
/**
|
||||
* Update TAGE for conditional branches.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param taken Actual branch outcome.
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
* @nrand Random int number from 0 to 3
|
||||
* @param corrTarget The correct branch target
|
||||
*/
|
||||
virtual void condBranchUpdate(
|
||||
ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi,
|
||||
int nrand, Addr corrTarget);
|
||||
|
||||
/**
|
||||
* TAGE prediction called from TAGE::predict
|
||||
* @param tid The thread ID to select the global
|
||||
* histories to use.
|
||||
* @param branch_pc The unshifted branch PC.
|
||||
* @param cond_branch True if the branch is conditional.
|
||||
* @param bi Pointer to the BranchInfo
|
||||
*/
|
||||
bool tagePredict(
|
||||
ThreadID tid, Addr branch_pc, bool cond_branch, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Update the stats
|
||||
* @param taken Actual branch outcome
|
||||
* @param bi Pointer to information on the prediction
|
||||
* recorded at prediction time.
|
||||
*/
|
||||
virtual void updateStats(bool taken, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Instantiates the TAGE table entries
|
||||
*/
|
||||
virtual void buildTageTables();
|
||||
|
||||
/**
|
||||
* Calculates the history lengths
|
||||
* and some other paramters in derived classes
|
||||
*/
|
||||
virtual void calculateParameters();
|
||||
|
||||
/**
|
||||
* On a prediction, calculates the TAGE indices and tags for
|
||||
* all the different history lengths
|
||||
*/
|
||||
virtual void calculateIndicesAndTags(
|
||||
ThreadID tid, Addr branch_pc, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Calculation of the index for useAltPredForNewlyAllocated
|
||||
* On this base TAGE implementation it is always 0
|
||||
*/
|
||||
virtual unsigned getUseAltIdx(BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Extra calculation to tell whether TAGE allocaitons may happen or not
|
||||
* on an update
|
||||
* For this base TAGE implementation it does nothing
|
||||
*/
|
||||
virtual void adjustAlloc(bool & alloc, bool taken);
|
||||
|
||||
/**
|
||||
* Handles Allocation and U bits reset on an update
|
||||
*/
|
||||
virtual void handleAllocAndUReset(
|
||||
bool alloc, bool taken, BranchInfo* bi, int nrand);
|
||||
|
||||
/**
|
||||
* Handles the U bits reset
|
||||
*/
|
||||
virtual void handleUReset();
|
||||
|
||||
/**
|
||||
* Handles the update of the TAGE entries
|
||||
*/
|
||||
virtual void handleTAGEUpdate(
|
||||
Addr branch_pc, bool taken, BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Algorithm for resetting a single U counter
|
||||
*/
|
||||
virtual void resetUctr(uint8_t & u);
|
||||
|
||||
/**
|
||||
* Extra steps for calculating altTaken
|
||||
* For this base TAGE class it does nothing
|
||||
*/
|
||||
virtual void extraAltCalc(BranchInfo* bi);
|
||||
|
||||
/**
|
||||
* Algorithm for returning a random number
|
||||
* This base TAGE class just uses random_mt, but some derived classes
|
||||
* may want to use a more realistic implementation or force some values
|
||||
*/
|
||||
static int getRandom();
|
||||
|
||||
void btbUpdate(ThreadID tid, Addr branch_addr, BranchInfo* &bi);
|
||||
unsigned getGHR(ThreadID tid, BranchInfo *bi) const;
|
||||
int8_t getCtr(int hitBank, int hitBankIndex) const;
|
||||
unsigned getTageCtrBits() const;
|
||||
int getPathHist(ThreadID tid) const;
|
||||
bool isSpeculativeUpdateEnabled() const;
|
||||
|
||||
protected:
|
||||
const unsigned logRatioBiModalHystEntries;
|
||||
const unsigned nHistoryTables;
|
||||
const unsigned tagTableCounterBits;
|
||||
const unsigned tagTableUBits;
|
||||
const unsigned histBufferSize;
|
||||
const unsigned minHist;
|
||||
const unsigned maxHist;
|
||||
const unsigned pathHistBits;
|
||||
|
||||
std::vector<unsigned> tagTableTagWidths;
|
||||
std::vector<int> logTagTableSizes;
|
||||
|
||||
std::vector<bool> btablePrediction;
|
||||
std::vector<bool> btableHysteresis;
|
||||
TageEntry **gtable;
|
||||
|
||||
// Keep per-thread histories to
|
||||
// support SMT.
|
||||
struct ThreadHistory {
|
||||
// Speculative path history
|
||||
// (LSB of branch address)
|
||||
int pathHist;
|
||||
|
||||
// Speculative branch direction
|
||||
// history (circular buffer)
|
||||
// @TODO Convert to std::vector<bool>
|
||||
uint8_t *globalHistory;
|
||||
|
||||
// Pointer to most recent branch outcome
|
||||
uint8_t* gHist;
|
||||
|
||||
// Index to most recent branch outcome
|
||||
int ptGhist;
|
||||
|
||||
// Speculative folded histories.
|
||||
FoldedHistory *computeIndices;
|
||||
FoldedHistory *computeTags[2];
|
||||
};
|
||||
|
||||
std::vector<ThreadHistory> threadHistory;
|
||||
|
||||
/**
|
||||
* Initialization of the folded histories
|
||||
*/
|
||||
virtual void initFoldedHistories(ThreadHistory & history);
|
||||
|
||||
int *histLengths;
|
||||
int *tableIndices;
|
||||
int *tableTags;
|
||||
|
||||
std::vector<int8_t> useAltPredForNewlyAllocated;
|
||||
int64_t tCounter;
|
||||
uint64_t logUResetPeriod;
|
||||
unsigned numUseAltOnNa;
|
||||
unsigned useAltOnNaBits;
|
||||
unsigned maxNumAlloc;
|
||||
|
||||
// Tells which tables are active
|
||||
// (for the base TAGE implementation all are active)
|
||||
// Some other classes use this for handling associativity
|
||||
std::vector<bool> noSkip;
|
||||
|
||||
const bool speculativeHistUpdate;
|
||||
|
||||
const unsigned instShiftAmt;
|
||||
|
||||
// stats
|
||||
Stats::Scalar tageLongestMatchProviderCorrect;
|
||||
Stats::Scalar tageAltMatchProviderCorrect;
|
||||
Stats::Scalar bimodalAltMatchProviderCorrect;
|
||||
Stats::Scalar tageBimodalProviderCorrect;
|
||||
Stats::Scalar tageLongestMatchProviderWrong;
|
||||
Stats::Scalar tageAltMatchProviderWrong;
|
||||
Stats::Scalar bimodalAltMatchProviderWrong;
|
||||
Stats::Scalar tageBimodalProviderWrong;
|
||||
Stats::Scalar tageAltMatchProviderWouldHaveHit;
|
||||
Stats::Scalar tageLongestMatchProviderWouldHaveHit;
|
||||
|
||||
Stats::Vector tageLongestMatchProvider;
|
||||
Stats::Vector tageAltMatchProvider;
|
||||
};
|
||||
|
||||
#endif // __CPU_PRED_TAGE_BASE
|
||||
@@ -266,7 +266,8 @@ TournamentBP::uncondBranch(ThreadID tid, Addr pc, void * &bp_history)
|
||||
|
||||
void
|
||||
TournamentBP::update(ThreadID tid, Addr branch_addr, bool taken,
|
||||
void *bp_history, bool squashed)
|
||||
void *bp_history, bool squashed,
|
||||
const StaticInstPtr & inst, Addr corrTarget)
|
||||
{
|
||||
assert(bp_history);
|
||||
|
||||
|
||||
@@ -101,9 +101,12 @@ class TournamentBP : public BPredUnit
|
||||
* when the branch was predicted.
|
||||
* @param squashed is set when this function is called during a squash
|
||||
* operation.
|
||||
* @param inst Static instruction information
|
||||
* @param corrTarget Resolved target of the branch (only needed if
|
||||
* squashed)
|
||||
*/
|
||||
void update(ThreadID tid, Addr branch_addr, bool taken, void *bp_history,
|
||||
bool squashed);
|
||||
bool squashed, const StaticInstPtr & inst, Addr corrTarget);
|
||||
|
||||
/**
|
||||
* Restores the global branch history on a squash.
|
||||
|
||||
Reference in New Issue
Block a user