Various branch predictor fixes/cleanup. It works more correctly now and supports both local and tournament predictors.

cpu/o3/2bit_local_pred.cc:
    Branch predictor cleanup/fixup.  Rename this to LocalBP.
cpu/o3/2bit_local_pred.hh:
    Rename to LocalBP, update to support changes to BPredUnit, include comments.
cpu/o3/alpha_cpu_builder.cc:
    Support extra parameters to the branch predictor.  Now it takes in a parameter to tell it which branch predictor it is using, the local or the tournament predictor.
cpu/o3/alpha_params.hh:
    Add in extra parameter for the branch predictor type.
cpu/o3/bpred_unit.cc:
    Branch predictor fixup/cleanup.  Rename it to BPredUnit.
cpu/o3/bpred_unit.hh:
    Branch predictor fixup/cleanup.  Now supports both the local and tournament predictors, and stores the branch predictor update state.
cpu/o3/bpred_unit_impl.hh:
    Branch predictor overhaul.  Now supports both the local and tournament predictors.
cpu/o3/cpu_policy.hh:
cpu/ozone/ozone_impl.hh:
cpu/ozone/simple_impl.hh:
    Reflect the class name change.
cpu/o3/decode_impl.hh:
    Be sure to set the predicted target as well so we don't squash twice.
cpu/o3/tournament_pred.cc:
cpu/o3/tournament_pred.hh:
    Fixes to the tournament predictor.
cpu/ozone/simple_params.hh:
    Include parameter for the branch predictor type.
python/m5/objects/AlphaFullCPU.py:
python/m5/objects/OzoneCPU.py:
    Include the parameter for the branch predictor type.

--HG--
extra : convert_revision : 34afebb3b40b47accb12558e439ee4cb03df5e64
This commit is contained in:
Kevin Lim
2006-05-25 17:01:48 -04:00
parent 02aa549c9b
commit 248bd2bb62
16 changed files with 461 additions and 195 deletions

View File

@@ -30,9 +30,9 @@
#include "base/trace.hh"
#include "cpu/o3/2bit_local_pred.hh"
DefaultBP::DefaultBP(unsigned _localPredictorSize,
unsigned _localCtrBits,
unsigned _instShiftAmt)
LocalBP::LocalBP(unsigned _localPredictorSize,
unsigned _localCtrBits,
unsigned _instShiftAmt)
: localPredictorSize(_localPredictorSize),
localCtrBits(_localCtrBits),
instShiftAmt(_instShiftAmt)
@@ -68,7 +68,7 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
}
void
DefaultBP::reset()
LocalBP::reset()
{
for (int i = 0; i < localPredictorSets; ++i) {
localCtrs[i].reset();
@@ -76,21 +76,21 @@ DefaultBP::reset()
}
bool
DefaultBP::lookup(Addr &branch_addr)
LocalBP::lookup(Addr &branch_addr, void * &bp_history)
{
bool taken;
uint8_t local_prediction;
uint8_t counter_val;
unsigned local_predictor_idx = getLocalIndex(branch_addr);
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
local_predictor_idx);
local_prediction = localCtrs[local_predictor_idx].read();
counter_val = localCtrs[local_predictor_idx].read();
DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
(int)local_prediction);
(int)counter_val);
taken = getPrediction(local_prediction);
taken = getPrediction(counter_val);
#if 0
// Speculative update.
@@ -107,8 +107,9 @@ DefaultBP::lookup(Addr &branch_addr)
}
void
DefaultBP::update(Addr &branch_addr, bool taken)
LocalBP::update(Addr &branch_addr, bool taken, void *bp_history)
{
assert(bp_history == NULL);
unsigned local_predictor_idx;
// Update the local predictor.
@@ -128,7 +129,7 @@ DefaultBP::update(Addr &branch_addr, bool taken)
inline
bool
DefaultBP::getPrediction(uint8_t &count)
LocalBP::getPrediction(uint8_t &count)
{
// Get the MSB of the count
return (count >> (localCtrBits - 1));
@@ -136,7 +137,7 @@ DefaultBP::getPrediction(uint8_t &count)
inline
unsigned
DefaultBP::getLocalIndex(Addr &branch_addr)
LocalBP::getLocalIndex(Addr &branch_addr)
{
return (branch_addr >> instShiftAmt) & indexMask;
}

View File

@@ -35,7 +35,14 @@
#include <vector>
class DefaultBP
/**
* Implements a local predictor that uses the PC to index into a table of
* counters. Note that any time a pointer to the bp_history is given, it
* should be NULL using this predictor because it does not have any branch
* predictor state that needs to be recorded or updated; the update can be
* determined solely by the branch being taken or not taken.
*/
class LocalBP
{
public:
/**
@@ -44,28 +51,31 @@ class DefaultBP
* @param localCtrBits Number of bits per counter.
* @param instShiftAmt Offset amount for instructions to ignore alignment.
*/
DefaultBP(unsigned localPredictorSize, unsigned localCtrBits,
unsigned instShiftAmt);
LocalBP(unsigned localPredictorSize, unsigned localCtrBits,
unsigned instShiftAmt);
/**
* Looks up the given address in the branch predictor and returns
* a true/false value as to whether it is taken.
* @param branch_addr The address of the branch to look up.
* @param bp_history Pointer to any bp history state.
* @return Whether or not the branch is taken.
*/
bool lookup(Addr &branch_addr);
bool lookup(Addr &branch_addr, void * &bp_history);
/**
* Updates the branch predictor with the actual result of a branch.
* @param branch_addr The address of the branch to update.
* @param taken Whether or not the branch was taken.
*/
void update(Addr &branch_addr, bool taken);
void update(Addr &branch_addr, bool taken, void *bp_history);
void squash(void *bp_history)
{ assert(bp_history == NULL); }
void reset();
private:
/**
* Returns the taken/not taken prediction given the value of the
* counter.

View File

@@ -109,6 +109,7 @@ Param<unsigned> squashWidth;
Param<Tick> trapLatency;
Param<Tick> fetchTrapLatency;
Param<std::string> predType;
Param<unsigned> localPredictorSize;
Param<unsigned> localCtrBits;
Param<unsigned> localHistoryTableSize;
@@ -234,6 +235,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
INIT_PARAM(localPredictorSize, "Size of local predictor"),
INIT_PARAM(localCtrBits, "Bits per counter"),
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
@@ -366,6 +368,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
params->trapLatency = trapLatency;
params->fetchTrapLatency = fetchTrapLatency;
params->predType = predType;
params->localPredictorSize = localPredictorSize;
params->localCtrBits = localCtrBits;
params->localHistoryTableSize = localHistoryTableSize;

View File

@@ -127,6 +127,7 @@ class AlphaSimpleParams : public BaseFullCPU::Params
//
// Branch predictor (BP & BTB)
//
std::string predType;
unsigned localPredictorSize;
unsigned localCtrBits;
unsigned localHistoryTableSize;

View File

@@ -32,6 +32,6 @@
#include "cpu/ozone/ozone_impl.hh"
#include "cpu/ozone/simple_impl.hh"
template class TwobitBPredUnit<AlphaSimpleImpl>;
template class TwobitBPredUnit<OzoneImpl>;
template class TwobitBPredUnit<SimpleImpl>;
template class BPredUnit<AlphaSimpleImpl>;
template class BPredUnit<OzoneImpl>;
template class BPredUnit<SimpleImpl>;

View File

@@ -46,16 +46,25 @@
* and the BTB.
*/
template<class Impl>
class TwobitBPredUnit
class BPredUnit
{
public:
private:
typedef typename Impl::Params Params;
typedef typename Impl::DynInstPtr DynInstPtr;
enum PredType {
Local,
Tournament
};
PredType predictor;
public:
/**
* @param params The params object, that has the size of the BP and BTB.
*/
TwobitBPredUnit(Params *params);
BPredUnit(Params *params);
/**
* Registers statistics.
@@ -76,6 +85,9 @@ class TwobitBPredUnit
*/
bool predict(DynInstPtr &inst, Addr &PC, unsigned tid);
// @todo: Rename this function.
void BPUncond(void * &bp_history);
/**
* Tells the branch predictor to commit any updates until the given
* sequence number.
@@ -104,13 +116,20 @@ class TwobitBPredUnit
void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
bool actually_taken, unsigned tid);
/**
* @param bp_history Pointer to the history object. The predictor
* will need to update any state and delete the object.
*/
void BPSquash(void *bp_history);
/**
* Looks up a given PC in the BP to see if it is taken or not taken.
* @param inst_PC The PC to look up.
* @param bp_history Pointer that will be set to an object that
* has the branch predictor state associated with the lookup.
* @return Whether the branch is taken or not taken.
*/
bool BPLookup(Addr &inst_PC)
{ return BP.lookup(inst_PC); }
bool BPLookup(Addr &inst_PC, void * &bp_history);
/**
* Looks up a given PC in the BTB to see if a matching entry exists.
@@ -132,10 +151,11 @@ class TwobitBPredUnit
* Updates the BP with taken/not taken information.
* @param inst_PC The branch's PC that will be updated.
* @param taken Whether the branch was taken or not taken.
* @param bp_history Pointer to the branch predictor state that is
* associated with the branch lookup that is being updated.
* @todo Make this update flexible enough to handle a global predictor.
*/
void BPUpdate(Addr &inst_PC, bool taken)
{ BP.update(inst_PC, taken); }
void BPUpdate(Addr &inst_PC, bool taken, void *bp_history);
/**
* Updates the BTB with the target of a branch.
@@ -145,18 +165,20 @@ class TwobitBPredUnit
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
{ BTB.update(inst_PC, target_PC,0); }
void dump();
private:
struct PredictorHistory {
/**
* Makes a predictor history struct that contains a sequence number,
* the PC of its instruction, and whether or not it was predicted
* taken.
* Makes a predictor history struct that contains any
* information needed to update the predictor, BTB, and RAS.
*/
PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
const bool pred_taken, const unsigned _tid)
: seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0),
const bool pred_taken, void *bp_history,
const unsigned _tid)
: seqNum(seq_num), PC(inst_PC), RASTarget(0),
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
wasCall(0)
wasCall(0), bpHistory(bp_history)
{ }
/** The sequence number for the predictor history entry. */
@@ -168,9 +190,6 @@ class TwobitBPredUnit
/** The RAS target (only valid if a return). */
Addr RASTarget;
/** The global history at the time this entry was created. */
unsigned globalHistory;
/** The RAS index of the instruction (only valid if a call). */
unsigned RASIndex;
@@ -185,6 +204,12 @@ class TwobitBPredUnit
/** Whether or not the instruction was a call. */
bool wasCall;
/** Pointer to the history object passed back from the branch
* predictor. It is used to update or restore state of the
* branch predictor.
*/
void *bpHistory;
};
typedef std::list<PredictorHistory> History;
@@ -196,8 +221,11 @@ class TwobitBPredUnit
*/
History predHist[Impl::MaxThreads];
/** The branch predictor. */
DefaultBP BP;
/** The local branch predictor. */
LocalBP *localBP;
/** The tournament branch predictor. */
TournamentBP *tournamentBP;
/** The BTB. */
DefaultBTB BTB;

View File

@@ -36,21 +36,40 @@
using namespace std;
template<class Impl>
TwobitBPredUnit<Impl>::TwobitBPredUnit(Params *params)
: BP(params->localPredictorSize,
params->localCtrBits,
params->instShiftAmt),
BTB(params->BTBEntries,
BPredUnit<Impl>::BPredUnit(Params *params)
: BTB(params->BTBEntries,
params->BTBTagSize,
params->instShiftAmt)
{
// Setup the selected predictor.
if (params->predType == "local") {
localBP = new LocalBP(params->localPredictorSize,
params->localCtrBits,
params->instShiftAmt);
predictor = Local;
} else if (params->predType == "tournament") {
tournamentBP = new TournamentBP(params->localPredictorSize,
params->localCtrBits,
params->localHistoryTableSize,
params->localHistoryBits,
params->globalPredictorSize,
params->globalHistoryBits,
params->globalCtrBits,
params->choicePredictorSize,
params->choiceCtrBits,
params->instShiftAmt);
predictor = Tournament;
} else {
fatal("Invalid BP selected!");
}
for (int i=0; i < Impl::MaxThreads; i++)
RAS[i].init(params->RASSize);
}
template <class Impl>
void
TwobitBPredUnit<Impl>::regStats()
BPredUnit<Impl>::regStats()
{
lookups
.name(name() + ".BPredUnit.lookups")
@@ -96,17 +115,20 @@ TwobitBPredUnit<Impl>::regStats()
template <class Impl>
void
TwobitBPredUnit<Impl>::switchOut()
BPredUnit<Impl>::switchOut()
{
// Clear any state upon switch out.
for (int i = 0; i < Impl::MaxThreads; ++i) {
predHist[i].clear();
squash(0, i);
}
}
template <class Impl>
void
TwobitBPredUnit<Impl>::takeOverFrom()
BPredUnit<Impl>::takeOverFrom()
{
// Can reset all predictor state, but it's not necessarily better
// than leaving it be.
/*
for (int i = 0; i < Impl::MaxThreads; ++i)
RAS[i].reset();
@@ -118,11 +140,10 @@ TwobitBPredUnit<Impl>::takeOverFrom()
template <class Impl>
bool
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
{
// See if branch predictor predicts taken.
// If so, get its target addr either from the BTB or the RAS.
// Once that's done, speculatively update the predictor?
// Save off record of branch stuff so the RAS can be fixed
// up once it's done.
@@ -133,20 +154,25 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
++lookups;
void *bp_history = NULL;
if (inst->isUncondCtrl()) {
DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
pred_taken = true;
// Tell the BP there was an unconditional branch.
BPUncond(bp_history);
} else {
++condPredicted;
pred_taken = BPLookup(PC);
pred_taken = BPLookup(PC, bp_history);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i "
"for PC %#x\n",
tid, pred_taken, inst->readPC());
}
PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid);
PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
bp_history, tid);
// Now lookup in the BTB or RAS.
if (pred_taken) {
@@ -187,7 +213,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
if (BTB.valid(PC, tid)) {
++BTBHits;
//If it's anything else, use the BTB to get the target addr.
// If it's not a return, use the BTB to get the target addr.
target = BTB.lookup(PC, tid);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted"
@@ -221,7 +247,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
template <class Impl>
void
TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
{
DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
"number %lli.\n", tid, done_sn);
@@ -229,8 +255,9 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
// Update the branch predictor with the correct results.
BP.update(predHist[tid].back().PC,
predHist[tid].back().predTaken);
BPUpdate(predHist[tid].back().PC,
predHist[tid].back().predTaken,
predHist[tid].back().bpHistory);
predHist[tid].pop_back();
}
@@ -238,13 +265,13 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
template <class Impl>
void
TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
{
History &pred_hist = predHist[tid];
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
if (pred_hist.front().usedRAS) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
" target: %#x.\n",
tid,
@@ -255,12 +282,15 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
pred_hist.front().RASTarget);
} else if (pred_hist.front().wasCall) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added "
"to the RAS.\n",tid);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry "
"added to the RAS.\n",tid);
RAS[tid].pop();
}
// This call should delete the bpHistory.
BPSquash(pred_hist.front().bpHistory);
pred_hist.pop_front();
}
@@ -268,10 +298,10 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
template <class Impl>
void
TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
const Addr &corr_target,
const bool actually_taken,
unsigned tid)
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
const Addr &corr_target,
const bool actually_taken,
unsigned tid)
{
// Now that we know that a branch was mispredicted, we need to undo
// all the branches that have been seen up until this branch and
@@ -285,40 +315,96 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
"setting target to %#x.\n",
tid, squashed_sn, corr_target);
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, "
"target: %#x.\n",
tid,
pred_hist.front().RASIndex,
pred_hist.front().RASTarget);
RAS[tid].restore(pred_hist.front().RASIndex,
pred_hist.front().RASTarget);
} else if (pred_hist.front().wasCall) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry"
" added to the RAS.\n", tid);
RAS[tid].pop();
}
pred_hist.pop_front();
}
squash(squashed_sn, tid);
// If there's a squash due to a syscall, there may not be an entry
// corresponding to the squash. In that case, don't bother trying to
// fix up the entry.
if (!pred_hist.empty()) {
pred_hist.front().predTaken = actually_taken;
assert(pred_hist.front().seqNum == squashed_sn);
if (pred_hist.front().usedRAS) {
++RASIncorrect;
}
BP.update(pred_hist.front().PC, actually_taken);
BPUpdate(pred_hist.front().PC, actually_taken,
pred_hist.front().bpHistory);
BTB.update(pred_hist.front().PC, corr_target, tid);
pred_hist.pop_front();
}
}
template <class Impl>
void
BPredUnit<Impl>::BPUncond(void * &bp_history)
{
// Only the tournament predictor cares about unconditional branches.
if (predictor == Tournament) {
tournamentBP->uncondBr(bp_history);
}
}
template <class Impl>
void
BPredUnit<Impl>::BPSquash(void *bp_history)
{
if (predictor == Local) {
localBP->squash(bp_history);
} else if (predictor == Tournament) {
tournamentBP->squash(bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
bool
BPredUnit<Impl>::BPLookup(Addr &inst_PC, void * &bp_history)
{
if (predictor == Local) {
return localBP->lookup(inst_PC, bp_history);
} else if (predictor == Tournament) {
return tournamentBP->lookup(inst_PC, bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
void
BPredUnit<Impl>::BPUpdate(Addr &inst_PC, bool taken, void *bp_history)
{
if (predictor == Local) {
localBP->update(inst_PC, taken, bp_history);
} else if (predictor == Tournament) {
tournamentBP->update(inst_PC, taken, bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
void
BPredUnit<Impl>::dump()
{
typename History::iterator pred_hist_it;
for (int i = 0; i < Impl::MaxThreads; ++i) {
if (!predHist[i].empty()) {
pred_hist_it = predHist[i].begin();
cprintf("predHist[%i].size(): %i\n", i, predHist[i].size());
while (pred_hist_it != predHist[i].end()) {
cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, "
"bpHistory:%#x\n",
(*pred_hist_it).seqNum, (*pred_hist_it).PC,
(*pred_hist_it).tid, (*pred_hist_it).predTaken,
(*pred_hist_it).bpHistory);
pred_hist_it++;
}
cprintf("\n");
}
}
}

View File

@@ -51,7 +51,7 @@
template<class Impl>
struct SimpleCPUPolicy
{
typedef TwobitBPredUnit<Impl> BPredUnit;
typedef BPredUnit<Impl> BPredUnit;
typedef PhysRegFile<Impl> RegFile;
typedef SimpleFreeList FreeList;
typedef SimpleRenameMap RenameMap;

View File

@@ -721,6 +721,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
inst->setPredTarg(inst->branchTarget());
break;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,6 +26,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "base/intmath.hh"
#include "cpu/o3/tournament_pred.hh"
TournamentBP::TournamentBP(unsigned _localPredictorSize,
@@ -49,7 +50,9 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
choiceCtrBits(_choiceCtrBits),
instShiftAmt(_instShiftAmt)
{
//Should do checks here to make sure sizes are correct (powers of 2)
if (!isPowerOf2(localPredictorSize)) {
fatal("Invalid local predictor size!\n");
}
//Setup the array of counters for the local predictor
localCtrs.resize(localPredictorSize);
@@ -57,6 +60,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
if (!isPowerOf2(localHistoryTableSize)) {
fatal("Invalid local history table size!\n");
}
//Setup the history table for the local table
localHistoryTable.resize(localHistoryTableSize);
@@ -66,6 +73,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
// Setup the local history mask
localHistoryMask = (1 << localHistoryBits) - 1;
if (!isPowerOf2(globalPredictorSize)) {
fatal("Invalid global predictor size!\n");
}
//Setup the array of counters for the global predictor
globalCtrs.resize(globalPredictorSize);
@@ -77,12 +88,17 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
// Setup the global history mask
globalHistoryMask = (1 << globalHistoryBits) - 1;
if (!isPowerOf2(choicePredictorSize)) {
fatal("Invalid choice predictor size!\n");
}
//Setup the array of counters for the choice predictor
choiceCtrs.resize(choicePredictorSize);
for (int i = 0; i < choicePredictorSize; ++i)
choiceCtrs[i].setBits(choiceCtrBits);
// @todo: Allow for different thresholds between the predictors.
threshold = (1 << (localCtrBits - 1)) - 1;
threshold = threshold / 2;
}
@@ -91,165 +107,185 @@ inline
unsigned
TournamentBP::calcLocHistIdx(Addr &branch_addr)
{
// Get low order bits after removing instruction offset.
return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
}
inline
void
TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
TournamentBP::updateGlobalHistTaken()
{
globalHistory = (globalHistory << 1) | 1;
globalHistory = globalHistory & globalHistoryMask;
}
inline
void
TournamentBP::updateGlobalHistNotTaken()
{
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
}
inline
void
TournamentBP::updateLocalHistTaken(unsigned local_history_idx)
{
localHistoryTable[local_history_idx] =
(localHistoryTable[local_history_idx] << 1) | 1;
}
inline
void
TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx)
{
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
localHistoryTable[local_history_idx] =
(localHistoryTable[local_history_idx] << 1);
}
bool
TournamentBP::lookup(Addr &branch_addr)
TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
{
uint8_t local_prediction;
bool local_prediction;
unsigned local_history_idx;
unsigned local_predictor_idx;
uint8_t global_prediction;
uint8_t choice_prediction;
bool global_prediction;
bool choice_prediction;
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
local_prediction = localCtrs[local_predictor_idx].read();
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
//Lookup in the global predictor to get its branch prediction
global_prediction = globalCtrs[globalHistory].read();
global_prediction = globalCtrs[globalHistory].read() > threshold;
//Lookup in the choice predictor to see which one to use
choice_prediction = choiceCtrs[globalHistory].read();
choice_prediction = choiceCtrs[globalHistory].read() > threshold;
//@todo Put a threshold value in for the three predictors that can
// be set through the constructor (so this isn't hard coded).
//Also should put some of this code into functions.
if (choice_prediction > threshold) {
if (global_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
// Create BPHistory and pass it back to be recorded.
BPHistory *history = new BPHistory;
history->globalHistory = globalHistory;
history->localPredTaken = local_prediction;
history->globalPredTaken = global_prediction;
history->globalUsed = choice_prediction;
bp_history = (void *)history;
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
globalCtrs[globalHistory].increment();
localCtrs[local_history_idx].increment();
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
// Commented code is for doing speculative update of counters and
// all histories.
if (choice_prediction) {
if (global_prediction) {
// updateHistoriesTaken(local_history_idx);
// globalCtrs[globalHistory].increment();
// localCtrs[local_history_idx].increment();
updateGlobalHistTaken();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
globalCtrs[globalHistory].decrement();
localCtrs[local_history_idx].decrement();
// updateHistoriesNotTaken(local_history_idx);
// globalCtrs[globalHistory].decrement();
// localCtrs[local_history_idx].decrement();
updateGlobalHistNotTaken();
return false;
}
} else {
if (local_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
globalCtrs[globalHistory].increment();
localCtrs[local_history_idx].increment();
if (local_prediction) {
// updateHistoriesTaken(local_history_idx);
// globalCtrs[globalHistory].increment();
// localCtrs[local_history_idx].increment();
updateGlobalHistTaken();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
globalCtrs[globalHistory].decrement();
localCtrs[local_history_idx].decrement();
// updateHistoriesNotTaken(local_history_idx);
// globalCtrs[globalHistory].decrement();
// localCtrs[local_history_idx].decrement();
updateGlobalHistNotTaken();
return false;
}
}
}
// Update the branch predictor if it predicted a branch wrong.
void
TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
TournamentBP::uncondBr(void * &bp_history)
{
// Create BPHistory and pass it back to be recorded.
BPHistory *history = new BPHistory;
history->globalHistory = globalHistory;
history->localPredTaken = true;
history->globalPredTaken = true;
bp_history = static_cast<void *>(history);
uint8_t local_prediction;
updateGlobalHistTaken();
}
void
TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
{
unsigned local_history_idx;
unsigned local_predictor_idx;
bool local_pred_taken;
unsigned local_predictor_hist;
uint8_t global_prediction;
bool global_pred_taken;
// Load the correct global history into the register.
globalHistory = correct_gh;
// Get the local predictor's current prediction, remove the incorrect
// update, and update the local predictor
// Get the local predictor's current prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = localHistoryTable[local_history_idx];
local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
local_predictor_hist = localHistoryTable[local_history_idx];
local_predictor_idx = local_predictor_hist & localHistoryMask;
local_prediction = localCtrs[local_predictor_idx].read();
local_pred_taken = local_prediction > threshold;
//Get the global predictor's current prediction, and update the
//global predictor
global_prediction = globalCtrs[globalHistory].read();
global_pred_taken = global_prediction > threshold;
//Update the choice predictor to tell it which one was correct
if (local_pred_taken != global_pred_taken) {
//If the local prediction matches the actual outcome, decerement
//the counter. Otherwise increment the counter.
if (local_pred_taken == taken) {
choiceCtrs[globalHistory].decrement();
} else {
choiceCtrs[globalHistory].increment();
// Update the choice predictor to tell it which one was correct if
// there was a prediction.
if (bp_history) {
BPHistory *history = static_cast<BPHistory *>(bp_history);
if (history->localPredTaken != history->globalPredTaken) {
// If the local prediction matches the actual outcome,
// decerement the counter. Otherwise increment the
// counter.
if (history->localPredTaken == taken) {
choiceCtrs[globalHistory].decrement();
} else if (history->globalPredTaken == taken){
choiceCtrs[globalHistory].increment();
}
}
// We're done with this history, now delete it.
delete history;
}
if (taken) {
assert(globalHistory < globalPredictorSize &&
local_predictor_idx < localPredictorSize);
assert(globalHistory < globalPredictorSize &&
local_predictor_idx < localPredictorSize);
// Update the counters and local history with the proper
// resolution of the branch. Global history is updated
// speculatively and restored upon squash() calls, so it does not
// need to be updated.
if (taken) {
localCtrs[local_predictor_idx].increment();
globalCtrs[globalHistory].increment();
globalHistory = (globalHistory << 1) | 1;
globalHistory = globalHistory & globalHistoryMask;
localHistoryTable[local_history_idx] |= 1;
updateLocalHistTaken(local_history_idx);
} else {
assert(globalHistory < globalPredictorSize &&
local_predictor_idx < localPredictorSize);
localCtrs[local_predictor_idx].decrement();
globalCtrs[globalHistory].decrement();
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
localHistoryTable[local_history_idx] &= ~1;
updateLocalHistNotTaken(local_history_idx);
}
}
void
TournamentBP::squash(void *bp_history)
{
BPHistory *history = static_cast<BPHistory *>(bp_history);
// Restore global history to state prior to this branch.
globalHistory = history->globalHistory;
// Delete this BPHistory now that we're done with it.
delete history;
}
#ifdef DEBUG
int
TournamentBP::BPHistory::newCount = 0;
#endif

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,15 @@
#include "cpu/o3/sat_counter.hh"
#include <vector>
/**
* Implements a tournament branch predictor, hopefully identical to the one
* used in the 21264. It has a local predictor, which uses a local history
* table to index into a table of counters, and a global predictor, which
* uses a global history to index into a table of counters. A choice
* predictor chooses between the two. Only the global history register
* is speculatively updated, the rest are updated upon branches committing
* or misspeculating.
*/
class TournamentBP
{
public:
@@ -53,30 +62,95 @@ class TournamentBP
/**
* Looks up the given address in the branch predictor and returns
* a true/false value as to whether it is taken.
* a true/false value as to whether it is taken. Also creates a
* BPHistory object to store any state it will need on squash/update.
* @param branch_addr The address of the branch to look up.
* @param bp_history Pointer that will be set to the BPHistory object.
* @return Whether or not the branch is taken.
*/
bool lookup(Addr &branch_addr);
bool lookup(Addr &branch_addr, void * &bp_history);
/**
* Records that there was an unconditional branch, and modifies
* the bp history to point to an object that has the previous
* global history stored in it.
* @param bp_history Pointer that will be set to the BPHistory object.
*/
void uncondBr(void * &bp_history);
/**
* Updates the branch predictor with the actual result of a branch.
* @param branch_addr The address of the branch to update.
* @param taken Whether or not the branch was taken.
* @param bp_history Pointer to the BPHistory object that was created
* when the branch was predicted.
*/
void update(Addr &branch_addr, unsigned global_history, bool taken);
void update(Addr &branch_addr, bool taken, void *bp_history);
/**
* Restores the global branch history on a squash.
* @param bp_history Pointer to the BPHistory object that has the
* previous global branch history in it.
*/
void squash(void *bp_history);
/** Returns the global history. */
inline unsigned readGlobalHist() { return globalHistory; }
private:
/**
* Returns if the branch should be taken or not, given a counter
* value.
* @param count The counter value.
*/
inline bool getPrediction(uint8_t &count);
/**
* Returns the local history index, given a branch address.
* @param branch_addr The branch's PC address.
*/
inline unsigned calcLocHistIdx(Addr &branch_addr);
inline void updateHistoriesTaken(unsigned local_history_idx);
/** Updates global history as taken. */
inline void updateGlobalHistTaken();
inline void updateHistoriesNotTaken(unsigned local_history_idx);
/** Updates global history as not taken. */
inline void updateGlobalHistNotTaken();
/**
* Updates local histories as taken.
* @param local_history_idx The local history table entry that
* will be updated.
*/
inline void updateLocalHistTaken(unsigned local_history_idx);
/**
* Updates local histories as not taken.
* @param local_history_idx The local history table entry that
* will be updated.
*/
inline void updateLocalHistNotTaken(unsigned local_history_idx);
/**
* The branch history information that is created upon predicting
* a branch. It will be passed back upon updating and squashing,
* when the BP can use this information to update/restore its
* state properly.
*/
struct BPHistory {
#ifdef DEBUG
BPHistory()
{ newCount++; }
~BPHistory()
{ newCount--; }
static int newCount;
#endif
unsigned globalHistory;
bool localPredTaken;
bool globalPredTaken;
bool globalUsed;
};
/** Local counters. */
std::vector<SatCounter> localCtrs;
@@ -101,7 +175,6 @@ class TournamentBP
/** Mask to get the proper local history. */
unsigned localHistoryMask;
/** Array of counters that make up the global predictor. */
std::vector<SatCounter> globalCtrs;
@@ -120,7 +193,6 @@ class TournamentBP
/** Mask to get the proper global history. */
unsigned globalHistoryMask;
/** Array of counters that make up the choice predictor. */
std::vector<SatCounter> choiceCtrs;

View File

@@ -54,7 +54,7 @@ struct OzoneImpl {
// Would like to put these into their own area.
// typedef NullPredictor BranchPred;
typedef TwobitBPredUnit<OzoneImpl> BranchPred;
typedef BPredUnit<OzoneImpl> BranchPred;
typedef FrontEnd<OzoneImpl> FrontEnd;
// Will need IQ, LSQ eventually
typedef LWBackEnd<OzoneImpl> BackEnd;

View File

@@ -51,7 +51,7 @@ struct SimpleImpl {
// Would like to put these into their own area.
// typedef NullPredictor BranchPred;
typedef TwobitBPredUnit<SimpleImpl> BranchPred;
typedef BPredUnit<SimpleImpl> BranchPred;
typedef FrontEnd<SimpleImpl> FrontEnd;
// Will need IQ, LSQ eventually
typedef InorderBackEnd<SimpleImpl> BackEnd;

View File

@@ -1,4 +1,30 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
#define __CPU_OZONE_SIMPLE_PARAMS_HH__
@@ -29,7 +55,6 @@ class SimpleParams : public BaseCPU::Params
AlphaITB *itb; AlphaDTB *dtb;
#else
std::vector<Process *> workload;
// Process *process;
#endif // FULL_SYSTEM
//Page Table
@@ -103,6 +128,7 @@ class SimpleParams : public BaseCPU::Params
//
// Branch predictor (BP & BTB)
//
std::string predType;
unsigned localPredictorSize;
unsigned localCtrBits;
unsigned localHistoryTableSize;

View File

@@ -55,6 +55,7 @@ class DerivAlphaFullCPU(BaseCPU):
trapLatency = Param.Tick("Trap latency")
fetchTrapLatency = Param.Tick("Fetch trap latency")
predType = Param.String("Branch predictor type ('local', 'tournament')")
localPredictorSize = Param.Unsigned("Size of local predictor")
localCtrBits = Param.Unsigned("Bits per counter")
localHistoryTableSize = Param.Unsigned("Size of local history table")

View File

@@ -57,6 +57,7 @@ class DerivOzoneCPU(BaseCPU):
commitWidth = Param.Unsigned("Commit width")
squashWidth = Param.Unsigned("Squash width")
predType = Param.String("Type of branch predictor ('local', 'tournament')")
localPredictorSize = Param.Unsigned("Size of local predictor")
localCtrBits = Param.Unsigned("Bits per counter")
localHistoryTableSize = Param.Unsigned("Size of local history table")