Fix up code for initial release. The main bug that remains is properly forwarding data from stores to loads, specifically when they are of differing sizes.

cpu/base_dyn_inst.cc:
    Remove unused commented out code.
cpu/base_dyn_inst.hh:
    Fix up comments.
cpu/beta_cpu/2bit_local_pred.cc:
    Reorder code to match header file.
cpu/beta_cpu/2bit_local_pred.hh:
    Update comments.
cpu/beta_cpu/alpha_dyn_inst.hh:
    Remove useless comments.
cpu/beta_cpu/alpha_dyn_inst_impl.hh:
cpu/beta_cpu/alpha_full_cpu_impl.hh:
cpu/beta_cpu/comm.hh:
cpu/beta_cpu/iew_impl.hh:
    Remove unused commented code.
cpu/beta_cpu/alpha_full_cpu.hh:
    Remove obsolete comment.
cpu/beta_cpu/alpha_impl.hh:
cpu/beta_cpu/full_cpu.hh:
    Alphabetize includes.
cpu/beta_cpu/bpred_unit.hh:
    Remove unused global history code.
cpu/beta_cpu/btb.hh:
cpu/beta_cpu/free_list.hh:
    Use full path in #defines.
cpu/beta_cpu/commit.hh:
cpu/beta_cpu/decode.hh:
    Reorder functions.
cpu/beta_cpu/commit_impl.hh:
    Remove obsolete commented code.
cpu/beta_cpu/fetch.hh:
    Remove obsolete comments.
cpu/beta_cpu/fetch_impl.hh:
cpu/beta_cpu/rename_impl.hh:
    Remove commented code.
cpu/beta_cpu/full_cpu.cc:
    Remove useless defines.
cpu/beta_cpu/inst_queue.hh:
    Use full path for #defines.
cpu/beta_cpu/inst_queue_impl.hh:
    Reorder functions to match header file.
cpu/beta_cpu/mem_dep_unit.hh:
    Use full path name for #defines.
cpu/beta_cpu/ras.hh:
    Use full path names for #defines.  Remove mod operation.
cpu/beta_cpu/regfile.hh:
    Remove unused commented code, fix up current comments.
cpu/beta_cpu/tournament_pred.cc:
cpu/beta_cpu/tournament_pred.hh:
    Update programming style.

--HG--
extra : convert_revision : fb9d18a853f58a1108ff827e3c123d5b52a0608a
This commit is contained in:
Kevin Lim
2005-05-19 01:28:25 -04:00
parent e5721ce677
commit c2fcac7c0d
29 changed files with 380 additions and 498 deletions

View File

@@ -63,11 +63,6 @@ typedef m5::hash_map<const BaseDynInst *, const BaseDynInst *, MyHashFunc> my_ha
my_hash_t thishash;
#endif
/** This may need to be specific to an implementation. */
//int BaseDynInst<Impl>::instcount = 0;
//int break_inst = -1;
template <class Impl>
BaseDynInst<Impl>::BaseDynInst(MachInst machInst, Addr inst_PC,
Addr pred_PC, InstSeqNum seq_num,
@@ -129,31 +124,11 @@ BaseDynInst<Impl>::initVars()
template <class Impl>
BaseDynInst<Impl>::~BaseDynInst()
{
/*
if (specMemWrite) {
// Remove effects of this instruction from speculative memory
xc->spec_mem->erase(effAddr);
}
*/
--instcount;
DPRINTF(FullCPU, "DynInst: Instruction destroyed. Instcount=%i\n",
instcount);
}
/*
template <class Impl>
FunctionalMemory *
BaseDynInst<Impl>::getMemory(void)
{
return xc->mem;
}
template <class Impl>
IntReg *
BaseDynInst<Impl>::getIntegerRegs(void)
{
return (spec_mode ? xc->specIntRegFile : xc->regs.intRegFile);
}
*/
template <class Impl>
void
BaseDynInst<Impl>::prefetch(Addr addr, unsigned flags)
@@ -369,8 +344,6 @@ BaseDynInst<Impl>::eaSrcsReady()
// EA calc depends on. (i.e. src reg 0 is the source of the data to be
// stored)
// StaticInstPtr<ISA> eaInst = staticInst->eaCompInst();
for (int i = 1; i < numSrcRegs(); ++i)
{
if (!_readySrcRegIdx[i])
@@ -380,7 +353,7 @@ BaseDynInst<Impl>::eaSrcsReady()
return true;
}
// Forward declaration...
// Forward declaration
template class BaseDynInst<AlphaSimpleImpl>;
template <>

View File

@@ -78,6 +78,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
MaxInstDestRegs = ISA::MaxInstDestRegs, //< Max dest regs
};
/** The static inst used by this dyn inst. */
StaticInstPtr<ISA> staticInst;
////////////////////////////////////////////
@@ -99,7 +100,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
Fault copySrcTranslate(Addr src);
Fault copy(Addr dest);
// Probably should be private...
/** @todo: Consider making this private. */
public:
/** Is this instruction valid. */
bool valid;
@@ -219,6 +220,7 @@ class BaseDynInst : public FastAlloc, public RefCounted
~BaseDynInst();
private:
/** Function to initialize variables in the constructors. */
void initVars();
public:
@@ -244,9 +246,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
*/
bool doneTargCalc() { return false; }
/** Returns the calculated target of the branch. */
// Addr readCalcTarg() { return nextPC; }
/** Returns the next PC. This could be the speculative next PC if it is
* called prior to the actual branch target being calculated.
*/
Addr readNextPC() { return nextPC; }
/** Set the predicted target of this current instruction. */
@@ -294,7 +296,10 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Returns the branch target address. */
Addr branchTarget() const { return staticInst->branchTarget(PC); }
/** Number of source registers. */
int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
/** Number of destination registers. */
int8_t numDestRegs() const { return staticInst->numDestRegs(); }
// the following are used to track physical register usage
@@ -314,8 +319,13 @@ class BaseDynInst : public FastAlloc, public RefCounted
return staticInst->srcRegIdx(i);
}
/** Returns the result of an integer instruction. */
uint64_t readIntResult() { return instResult.integer; }
/** Returns the result of a floating point instruction. */
float readFloatResult() { return instResult.fp; }
/** Returns the result of a floating point (double) instruction. */
double readDoubleResult() { return instResult.dbl; }
//Push to .cc file.
@@ -328,6 +338,9 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
}
/** Marks a specific register as ready.
* @todo: Move this to .cc file.
*/
void markSrcRegReady(RegIndex src_idx)
{
++readyRegs;
@@ -339,13 +352,16 @@ class BaseDynInst : public FastAlloc, public RefCounted
}
}
/** Returns if a source register is ready. */
bool isReadySrcRegIdx(int idx) const
{
return this->_readySrcRegIdx[idx];
}
/** Sets this instruction as completed. */
void setCompleted() { completed = true; }
/** Returns whethe or not this instruction is completed. */
bool isCompleted() const { return completed; }
/** Sets this instruction as ready to issue. */
@@ -393,20 +409,39 @@ class BaseDynInst : public FastAlloc, public RefCounted
/** Set the next PC of this instruction (its actual target). */
void setNextPC(uint64_t val) { nextPC = val; }
/** Returns the exec context.
* @todo: Remove this once the ExecContext is no longer used.
*/
ExecContext *xcBase() { return xc; }
private:
/** Instruction effective address.
* @todo: Consider if this is necessary or not.
*/
Addr instEffAddr;
/** Whether or not the effective address calculation is completed.
* @todo: Consider if this is necessary or not.
*/
bool eaCalcDone;
public:
/** Sets the effective address. */
void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; }
/** Returns the effective address. */
const Addr &getEA() const { return instEffAddr; }
/** Returns whether or not the eff. addr. calculation has been completed. */
bool doneEACalc() { return eaCalcDone; }
/** Returns whether or not the eff. addr. source registers are ready. */
bool eaSrcsReady();
public:
/** Load queue index. */
int16_t lqIdx;
/** Store queue index. */
int16_t sqIdx;
};
@@ -439,8 +474,7 @@ BaseDynInst<Impl>::read(Addr addr, T &data, unsigned flags)
if (fault == No_Fault) {
fault = cpu->read(req, data, lqIdx);
}
else {
} else {
// Return a fixed value to keep simulation deterministic even
// along misspeculated paths.
data = (T)-1;
@@ -464,9 +498,6 @@ BaseDynInst<Impl>::write(T data, Addr addr, unsigned flags, uint64_t *res)
traceData->setData(data);
}
// storeSize = sizeof(T);
// storeData = data;
MemReqPtr req = new MemReq(addr, xc, sizeof(T), flags);
req->asid = asid;

View File

@@ -30,21 +30,6 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
instShiftAmt);
}
inline
bool
DefaultBP::getPrediction(uint8_t &count)
{
// Get the MSB of the count
return (count >> (localCtrBits - 1));
}
inline
unsigned
DefaultBP::getLocalIndex(Addr &branch_addr)
{
return (branch_addr >> instShiftAmt) & indexMask;
}
bool
DefaultBP::lookup(Addr &branch_addr)
{
@@ -91,15 +76,26 @@ DefaultBP::update(Addr &branch_addr, bool taken)
assert(local_predictor_idx < localPredictorSize);
// Increment or decrement twice to undo speculative update, then
// properly update
if (taken) {
DPRINTF(Fetch, "Branch predictor: Branch updated as taken.\n");
localCtrs[local_predictor_idx].increment();
// localCtrs[local_predictor_idx].increment();
} else {
DPRINTF(Fetch, "Branch predictor: Branch updated as not taken.\n");
localCtrs[local_predictor_idx].decrement();
// localCtrs[local_predictor_idx].decrement();
}
}
inline
bool
DefaultBP::getPrediction(uint8_t &count)
{
// Get the MSB of the count
return (count >> (localCtrBits - 1));
}
inline
unsigned
DefaultBP::getLocalIndex(Addr &branch_addr)
{
return (branch_addr >> instShiftAmt) & indexMask;
}

View File

@@ -31,8 +31,12 @@ class DefaultBP
private:
/** Returns the taken/not taken prediction given the value of the
* counter.
*/
inline bool getPrediction(uint8_t &count);
/** Calculates the local index based on the PC. */
inline unsigned getLocalIndex(Addr &PC);
/** Array of counters that make up the local predictor. */

View File

@@ -1,5 +1,3 @@
//Todo:
#ifndef __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
#define __CPU_BETA_CPU_ALPHA_DYN_INST_HH__
@@ -123,6 +121,7 @@ class AlphaDynInst : public BaseDynInst<Impl>
{
return this->cpu->readFloatRegInt(_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/

View File

@@ -130,7 +130,6 @@ void
AlphaDynInst<Impl>::syscall()
{
this->cpu->syscall(this->threadNumber);
// this->cpu->syscall();
}
#endif

View File

@@ -103,6 +103,9 @@ class AlphaFullCPU : public FullBetaCPU<Impl>
this->regFile.setFpcr(val);
}
// Most of the full system code and syscall emulation is not yet
// implemented. These functions do show what the final interface will
// look like.
#ifdef FULL_SYSTEM
uint64_t *getIpr();
uint64_t readIpr(int idx, Fault &fault);

View File

@@ -71,8 +71,8 @@ AlphaFullCPU<Impl>::syscall(short thread_num)
// Copy over all important state to xc once all the unrolling is done.
copyToXC();
// This is hardcoded to thread 0 while the CPU is only single threaded.
this->thread[0]->syscall();
// this->thread[thread_num]->syscall();
// Copy over all important state back to CPU.
copyFromXC();
@@ -355,15 +355,6 @@ AlphaFullCPU<Impl>::swapPALShadow(bool use_shadow)
// Will have to lookup in rename map to get physical registers, then
// swap.
/*
for (int i = 0; i < AlphaISA::NumIntRegs; i++) {
if (reg_redir[i]) {
AlphaISA::IntReg temp = regs->intRegFile[i];
regs->intRegFile[i] = regs->palregs[i];
regs->palregs[i] = temp;
}
}
*/
}
#endif // FULL_SYSTEM

View File

@@ -3,8 +3,8 @@
#include "arch/alpha/isa_traits.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
#include "cpu/beta_cpu/alpha_params.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
// Forward declarations.
template <class Impl>

View File

@@ -36,19 +36,16 @@ class TwobitBPredUnit
bool predict(DynInstPtr &inst, Addr &PC);
void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
bool actually_taken);
void update(const InstSeqNum &done_sn);
void squash(const InstSeqNum &squashed_sn);
void update(const InstSeqNum &done_sn);
void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
bool actually_taken);
bool BPLookup(Addr &inst_PC)
{ return BP.lookup(inst_PC); }
unsigned BPReadGlobalHist()
{ return 0; }
bool BTBValid(Addr &inst_PC)
{ return BTB.valid(inst_PC); }
@@ -56,7 +53,7 @@ class TwobitBPredUnit
{ return BTB.lookup(inst_PC); }
// Will want to include global history.
void BPUpdate(Addr &inst_PC, unsigned global_history, bool taken)
void BPUpdate(Addr &inst_PC, bool taken)
{ BP.update(inst_PC, taken); }
void BTBUpdate(Addr &inst_PC, Addr &target_PC)

View File

@@ -1,5 +1,5 @@
#ifndef __BTB_HH__
#define __BTB_HH__
#ifndef __CPU_BETA_CPU_BTB_HH__
#define __CPU_BETA_CPU_BTB_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
@@ -49,4 +49,4 @@ class DefaultBTB
unsigned tagShiftAmt;
};
#endif // __BTB_HH__
#endif // __CPU_BETA_CPU_BTB_HH__

View File

@@ -3,6 +3,7 @@
#include <stdint.h>
#include <vector>
#include "arch/alpha/isa_traits.hh"
#include "cpu/inst_seq.hh"
@@ -112,11 +113,6 @@ struct TimeBufStruct {
uint64_t mispredPC;
uint64_t nextPC;
// Think of better names here.
// Will need to be a variety of sizes...
// Maybe make it a vector, that way only need one object.
// std::vector<PhysRegIndex> freeRegs;
bool robSquashing;
// Represents the instruction that has either been retired or
@@ -124,9 +120,8 @@ struct TimeBufStruct {
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
// Extra bits of information so that the LDSTQ only updates when it
// Extra bit of information so that the LDSTQ only updates when it
// needs to.
// bool commitIsStore;
bool commitIsLoad;
// Communication specifically to the IQ to tell the IQ that it can

View File

@@ -72,10 +72,6 @@ class SimpleCommit
void commit();
uint64_t readCommitPC();
void setSquashing() { _status = ROBSquashing; }
private:
void commitInsts();
@@ -86,6 +82,12 @@ class SimpleCommit
void markCompletedInsts();
public:
uint64_t readCommitPC();
void setSquashing() { _status = ROBSquashing; }
private:
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
@@ -113,9 +115,6 @@ class SimpleCommit
/** Pointer to FullCPU. */
FullCPU *cpu;
//Store buffer interface? Will need to move committed stores to the
//store buffer
/** Memory interface. Used for d-cache accesses. */
MemInterface *dcacheInterface;

View File

@@ -1,10 +1,3 @@
// @todo: Bug when something reaches execute, and mispredicts, but is never
// put into the ROB because the ROB is full. Need rename stage to predict
// the free ROB entries better.
#ifndef __COMMIT_IMPL_HH__
#define __COMMIT_IMPL_HH__
#include "base/timebuf.hh"
#include "cpu/beta_cpu/commit.hh"
#include "cpu/exetrace.hh"
@@ -274,13 +267,6 @@ SimpleCommit<Impl>::commitInsts()
// time. However, we need to avoid updating any other state
// incorrectly if it's already been squashed.
if (head_inst->isSquashed()) {
// Hack to avoid the instruction being retired (and deleted) if
// it hasn't been through the IEW stage yet.
/*
if (!head_inst->isExecuted()) {
break;
}
*/
DPRINTF(Commit, "Commit: Retiring squashed instruction from "
"ROB.\n");
@@ -418,21 +404,6 @@ SimpleCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
++commitCommittedBranches;
}
#if 0
// Explicit communication back to the LDSTQ that a load has been committed
// and can be removed from the LDSTQ. Stores don't need this because
// the LDSTQ will already have been told that a store has reached the head
// of the ROB. Consider including communication if it's a store as well
// to keep things orthagonal.
if (head_inst->isMemRef()) {
++commitCommittedMemRefs;
if (head_inst->isLoad()) {
toIEW->commitInfo.commitIsLoad = true;
++commitCommittedLoads;
}
}
#endif
// Now that the instruction is going to be committed, finalize its
// trace data.
if (head_inst->traceData) {
@@ -501,5 +472,3 @@ SimpleCommit<Impl>::readCommitPC()
{
return rob->readHeadPC();
}
#endif // __COMMIT_IMPL_HH__

View File

@@ -64,9 +64,6 @@ class SimpleDecode
void decode();
// Might want to make squash a friend function.
void squash();
private:
inline bool fetchInstsValid();
@@ -76,8 +73,11 @@ class SimpleDecode
void squash(DynInstPtr &inst);
void dumpFetchQueue();
public:
// Might want to make squash a friend function.
void squash();
private:
// Interfaces to objects outside of decode.
/** CPU interface. */
FullCPU *cpu;
@@ -113,7 +113,6 @@ class SimpleDecode
/** Skid buffer between fetch and decode. */
std::queue<FetchStruct> skidBuffer;
private:
//Consider making these unsigned to avoid any confusion.
/** Rename to decode delay, in ticks. */
unsigned renameToDecodeDelay;

View File

@@ -1,15 +1,9 @@
// Todo: add in statistics, only get the MachInst and let decode actually
// decode, think about SMT fetch,
// fix up branch prediction stuff into one thing,
// Figure out where to advance time buffer. Add a way to get a
// stage's current status.
// Todo: SMT fetch,
// Add a way to get a stage's current status.
#ifndef __CPU_BETA_CPU_SIMPLE_FETCH_HH__
#define __CPU_BETA_CPU_SIMPLE_FETCH_HH__
//Will want to include: time buffer, structs, MemInterface, Event,
//whatever class bzero uses, MemReqPtr
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/pc_event.hh"
@@ -56,6 +50,19 @@ class SimpleFetch
bool stalled;
public:
class CacheCompletionEvent : public Event
{
private:
SimpleFetch *fetch;
public:
CacheCompletionEvent(SimpleFetch *_fetch);
virtual void process();
virtual const char *description();
};
public:
/** SimpleFetch constructor. */
SimpleFetch(Params &params);
@@ -68,20 +75,9 @@ class SimpleFetch
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
void tick();
void fetch();
void processCacheCompletion();
// Figure out PC vs next PC and how it should be updated
void squash(const Addr &new_PC);
private:
inline void doSquash(const Addr &new_PC);
void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
/**
* Looks up in the branch predictor to see if the next PC should be
* either next PC+=MachInst or a branch target.
@@ -101,6 +97,18 @@ class SimpleFetch
*/
Fault fetchCacheLine(Addr fetch_PC);
inline void doSquash(const Addr &new_PC);
void squashFromDecode(const Addr &new_PC, const InstSeqNum &seq_num);
public:
// Figure out PC vs next PC and how it should be updated
void squash(const Addr &new_PC);
void tick();
void fetch();
// Align an address (typically a PC) to the start of an I-cache block.
// We fold in the PISA 64- to 32-bit conversion here as well.
Addr icacheBlockAlignPC(Addr addr)
@@ -109,21 +117,6 @@ class SimpleFetch
return (addr & ~(cacheBlkMask));
}
public:
class CacheCompletionEvent : public Event
{
private:
SimpleFetch *fetch;
public:
CacheCompletionEvent(SimpleFetch *_fetch);
virtual void process();
virtual const char *description();
};
// CacheCompletionEvent cacheCompletionEvent;
private:
/** Pointer to the FullCPU. */
FullCPU *cpu;

View File

@@ -35,8 +35,7 @@ SimpleFetch<Impl>::CacheCompletionEvent::description()
template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
: //cacheCompletionEvent(this),
icacheInterface(params.icacheInterface),
: icacheInterface(params.icacheInterface),
branchPred(params),
decodeToFetchDelay(params.decodeToFetchDelay),
renameToFetchDelay(params.renameToFetchDelay),
@@ -254,7 +253,6 @@ SimpleFetch<Impl>::fetchCacheLine(Addr fetch_PC)
// up this stage once the cache miss completes.
if (result != MA_HIT && icacheInterface->doEvents()) {
memReq->completionEvent = new CacheCompletionEvent(this);
// lastIcacheStall = curTick;
// How does current model work as far as individual
// stages scheduling/unscheduling?

View File

@@ -1,13 +1,13 @@
#ifndef __FREE_LIST_HH__
#define __FREE_LIST_HH__
#ifndef __CPU_BETA_CPU_FREE_LIST_HH__
#define __CPU_BETA_CPU_FREE_LIST_HH__
#include <iostream>
#include <queue>
#include "arch/alpha/isa_traits.hh"
#include "cpu/beta_cpu/comm.hh"
#include "base/traceflags.hh"
#include "base/trace.hh"
#include "base/traceflags.hh"
#include "cpu/beta_cpu/comm.hh"
/**
* FreeList class that simply holds the list of free integer and floating
@@ -25,8 +25,6 @@
*/
class SimpleFreeList
{
public:
private:
/** The list of free integer registers. */
std::queue<PhysRegIndex> freeIntRegs;
@@ -60,15 +58,15 @@ class SimpleFreeList
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs);
PhysRegIndex getIntReg();
inline PhysRegIndex getIntReg();
PhysRegIndex getFloatReg();
inline PhysRegIndex getFloatReg();
void addReg(PhysRegIndex freed_reg);
inline void addReg(PhysRegIndex freed_reg);
void addIntReg(PhysRegIndex freed_reg);
inline void addIntReg(PhysRegIndex freed_reg);
void addFloatReg(PhysRegIndex freed_reg);
inline void addFloatReg(PhysRegIndex freed_reg);
bool hasFreeIntRegs()
{ return !freeIntRegs.empty(); }
@@ -166,4 +164,4 @@ SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
freeFloatRegs.push(freed_reg);
}
#endif // __FREE_LIST_HH__
#endif // __CPU_BETA_CPU_FREE_LIST_HH__

View File

@@ -1,6 +1,3 @@
#ifndef __SIMPLE_FULL_CPU_CC__
#define __SIMPLE_FULL_CPU_CC__
#ifdef FULL_SYSTEM
#include "sim/system.hh"
#else
@@ -528,5 +525,3 @@ FullBetaCPU<Impl>::wakeDependents(DynInstPtr &inst)
// Forward declaration of FullBetaCPU.
template class FullBetaCPU<AlphaSimpleImpl>;
#endif // __SIMPLE_FULL_CPU_HH__

View File

@@ -12,13 +12,12 @@
#include <list>
#include <vector>
#include "cpu/beta_cpu/comm.hh"
#include "base/statistics.hh"
#include "base/timebuf.hh"
#include "cpu/base_cpu.hh"
#include "cpu/exec_context.hh"
#include "cpu/beta_cpu/comm.hh"
#include "cpu/beta_cpu/cpu_policy.hh"
#include "cpu/exec_context.hh"
#include "sim/process.hh"
#ifdef FULL_SYSTEM
@@ -96,15 +95,15 @@ class FullBetaCPU : public BaseFullCPU
}
public:
void tick();
FullBetaCPU(Params &params);
~FullBetaCPU();
void init();
void fullCPURegStats();
void tick();
void init();
void activateContext(int thread_num, int delay);
void suspendContext(int thread_num);
void deallocateContext(int thread_num);

View File

@@ -361,20 +361,7 @@ SimpleIEW<Impl>::dispatchInsts()
} else if (inst->isStore()) {
ldstQueue.insertStore(inst);
// A bit of a hack. Set that it can commit so that
// the commit stage will try committing it, and then
// once commit realizes it's a store it will send back
// a signal to this stage to issue and execute that
// store. Change to be a bit that says the instruction
// has extra work to do at commit.
// inst->setCanCommit();
// instQueue.insertNonSpec(inst);
++iewDispStoreInsts;
// ++iewDispNonSpecInsts;
// continue;
} else if (inst->isNonSpeculative()) {
DPRINTF(IEW, "IEW: Issue: Nonspeculative instruction "
"encountered, skipping.\n");
@@ -404,8 +391,6 @@ SimpleIEW<Impl>::dispatchInsts()
DPRINTF(IEW, "IEW: Issue: Executed branch encountered, "
"skipping.\n");
// assert(inst->isDirectCtrl());
inst->setIssued();
inst->setCanCommit();
@@ -614,10 +599,6 @@ SimpleIEW<Impl>::tick()
}
++iewSquashCycles;
// Also should advance its own time buffers if the stage ran.
// Not sure about this...
// issueToExecQueue.advance();
} else if (_status == Blocked) {
// Continue to tell previous stage to stall.
toRename->iewInfo.stall = true;
@@ -654,14 +635,11 @@ SimpleIEW<Impl>::tick()
// or store to commit. Also check if it's being told to execute a
// nonspeculative instruction.
// This is pretty inefficient...
// if (0/*fromCommit->commitInfo.commitIsStore*/) {
if (!fromCommit->commitInfo.squash &&
!fromCommit->commitInfo.robSquashing) {
ldstQueue.commitStores(fromCommit->commitInfo.doneSeqNum);
// } else if (fromCommit->commitInfo.commitIsLoad) {
ldstQueue.commitLoads(fromCommit->commitInfo.doneSeqNum);
}
// }
if (fromCommit->commitInfo.nonSpecSeqNum != 0) {
instQueue.scheduleNonSpec(fromCommit->commitInfo.nonSpecSeqNum);

View File

@@ -1,5 +1,5 @@
#ifndef __INST_QUEUE_HH__
#define __INST_QUEUE_HH__
#ifndef __CPU_BETA_CPU_INST_QUEUE_HH__
#define __CPU_BETA_CPU_INST_QUEUE_HH__
#include <list>
#include <map>
@@ -103,19 +103,6 @@ class InstructionQueue
void stopSquash();
/** Debugging function to dump all the list sizes, as well as print
* out the list of nonspeculative instructions. Should not be used
* in any other capacity, but it has no harmful sideaffects.
*/
void dumpLists();
private:
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
* during normal execution.
*/
int countInsts();
private:
/** Pointer to the CPU. */
FullCPU *cpu;
@@ -157,9 +144,6 @@ class InstructionQueue
/** List of ready branch instructions. */
ReadyInstQueue readyBranchInsts;
/** List of ready memory instructions. */
// ReadyInstQueue readyMemInsts;
/** List of ready miscellaneous instructions. */
ReadyInstQueue readyMiscInsts;
@@ -281,10 +265,26 @@ class InstructionQueue
bool addToDependents(DynInstPtr &new_inst);
void insertDependency(DynInstPtr &new_inst);
void createDependency(DynInstPtr &new_inst);
void dumpDependGraph();
void addIfReady(DynInstPtr &inst);
private:
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
* during normal execution.
*/
int countInsts();
/** Debugging function to dump out the dependency graph.
*/
void dumpDependGraph();
/** Debugging function to dump all the list sizes, as well as print
* out the list of nonspeculative instructions. Should not be used
* in any other capacity, but it has no harmful sideaffects.
*/
void dumpLists();
Stats::Scalar<> iqInstsAdded;
Stats::Scalar<> iqNonSpecInstsAdded;
// Stats::Scalar<> iqIntInstsAdded;
@@ -305,4 +305,4 @@ class InstructionQueue
};
#endif //__INST_QUEUE_HH__
#endif //__CPU_BETA_CPU_INST_QUEUE_HH__

View File

@@ -1,6 +1,3 @@
#ifndef __INST_QUEUE_IMPL_HH__
#define __INST_QUEUE_IMPL_HH__
// Todo:
// Current ordering allows for 0 cycle added-to-scheduled. Could maybe fake
// it; either do in reverse order, or have added instructions put into a
@@ -171,6 +168,13 @@ InstructionQueue<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
fromCommit = timeBuffer->getWire(-commitToIEWDelay);
}
template <class Impl>
unsigned
InstructionQueue<Impl>::numFreeEntries()
{
return freeEntries;
}
// Might want to do something more complex if it knows how many instructions
// will be issued this cycle.
template <class Impl>
@@ -184,13 +188,6 @@ InstructionQueue<Impl>::isFull()
}
}
template <class Impl>
unsigned
InstructionQueue<Impl>::numFreeEntries()
{
return freeEntries;
}
template <class Impl>
void
InstructionQueue<Impl>::insert(DynInstPtr &new_inst)
@@ -562,7 +559,6 @@ InstructionQueue<Impl>::scheduleReadyInsts()
break;
case Squashed:
// issuing_inst = squashed_head_inst;
assert(0 && "Squashed insts should not issue any more!");
squashedInsts.pop();
// Set the squashed instruction as able to commit so that commit
@@ -619,6 +615,77 @@ InstructionQueue<Impl>::scheduleNonSpec(const InstSeqNum &inst)
nonSpecInsts.erase(inst_it);
}
template <class Impl>
void
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
{
DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
//Look at the physical destination register of the DynInst
//and look it up on the dependency graph. Then mark as ready
//any instructions within the instruction queue.
DependencyEntry *curr;
// Tell the memory dependence unit to wake any dependents on this
// instruction if it is a memory instruction.
if (completed_inst->isMemRef()) {
memDepUnit.wakeDependents(completed_inst);
}
for (int dest_reg_idx = 0;
dest_reg_idx < completed_inst->numDestRegs();
dest_reg_idx++)
{
PhysRegIndex dest_reg =
completed_inst->renamedDestRegIdx(dest_reg_idx);
// Special case of uniq or control registers. They are not
// handled by the IQ and thus have no dependency graph entry.
// @todo Figure out a cleaner way to handle this.
if (dest_reg >= numPhysRegs) {
continue;
}
DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
(int) dest_reg);
//Maybe abstract this part into a function.
//Go through the dependency chain, marking the registers as ready
//within the waiting instructions.
while (dependGraph[dest_reg].next) {
curr = dependGraph[dest_reg].next;
DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
curr->inst->readPC());
// Might want to give more information to the instruction
// so that it knows which of its source registers is ready.
// However that would mean that the dependency graph entries
// would need to hold the src_reg_idx.
curr->inst->markSrcRegReady();
addIfReady(curr->inst);
dependGraph[dest_reg].next = curr->next;
DependencyEntry::mem_alloc_counter--;
curr->inst = NULL;
delete curr;
}
// Reset the head node now that all of its dependents have been woken
// up.
dependGraph[dest_reg].next = NULL;
dependGraph[dest_reg].inst = NULL;
// Mark the scoreboard as having that register ready.
regScoreboard[dest_reg] = true;
}
}
template <class Impl>
void
InstructionQueue<Impl>::violation(DynInstPtr &store,
@@ -747,73 +814,56 @@ InstructionQueue<Impl>::stopSquash()
template <class Impl>
void
InstructionQueue<Impl>::wakeDependents(DynInstPtr &completed_inst)
InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
{
DPRINTF(IQ, "IQ: Waking dependents of completed instruction.\n");
//Look at the physical destination register of the DynInst
//and look it up on the dependency graph. Then mark as ready
//any instructions within the instruction queue.
DependencyEntry *curr;
//Add this new, dependent instruction at the head of the dependency
//chain.
// Tell the memory dependence unit to wake any dependents on this
// instruction if it is a memory instruction.
// First create the entry that will be added to the head of the
// dependency chain.
DependencyEntry *new_entry = new DependencyEntry;
new_entry->next = this->next;
new_entry->inst = new_inst;
if (completed_inst->isMemRef()) {
memDepUnit.wakeDependents(completed_inst);
// Then actually add it to the chain.
this->next = new_entry;
++mem_alloc_counter;
}
template <class Impl>
void
InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
{
DependencyEntry *prev = this;
DependencyEntry *curr = this->next;
// Make sure curr isn't NULL. Because this instruction is being
// removed from a dependency list, it must have been placed there at
// an earlier time. The dependency chain should not be empty,
// unless the instruction dependent upon it is already ready.
if (curr == NULL) {
return;
}
for (int dest_reg_idx = 0;
dest_reg_idx < completed_inst->numDestRegs();
dest_reg_idx++)
// Find the instruction to remove within the dependency linked list.
while(curr->inst != inst_to_remove)
{
PhysRegIndex dest_reg =
completed_inst->renamedDestRegIdx(dest_reg_idx);
prev = curr;
curr = curr->next;
// Special case of uniq or control registers. They are not
// handled by the IQ and thus have no dependency graph entry.
// @todo Figure out a cleaner way to handle this.
if (dest_reg >= numPhysRegs) {
continue;
}
DPRINTF(IQ, "IQ: Waking any dependents on register %i.\n",
(int) dest_reg);
//Maybe abstract this part into a function.
//Go through the dependency chain, marking the registers as ready
//within the waiting instructions.
while (dependGraph[dest_reg].next) {
curr = dependGraph[dest_reg].next;
DPRINTF(IQ, "IQ: Waking up a dependent instruction, PC%#x.\n",
curr->inst->readPC());
// Might want to give more information to the instruction
// so that it knows which of its source registers is ready.
// However that would mean that the dependency graph entries
// would need to hold the src_reg_idx.
curr->inst->markSrcRegReady();
addIfReady(curr->inst);
dependGraph[dest_reg].next = curr->next;
DependencyEntry::mem_alloc_counter--;
curr->inst = NULL;
delete curr;
}
// Reset the head node now that all of its dependents have been woken
// up.
dependGraph[dest_reg].next = NULL;
dependGraph[dest_reg].inst = NULL;
// Mark the scoreboard as having that register ready.
regScoreboard[dest_reg] = true;
assert(curr != NULL);
}
// Now remove this instruction from the list.
prev->next = curr->next;
--mem_alloc_counter;
// Could push this off to the destructor of DependencyEntry
curr->inst = NULL;
delete curr;
}
template <class Impl>
@@ -898,60 +948,6 @@ InstructionQueue<Impl>::createDependency(DynInstPtr &new_inst)
}
}
template <class Impl>
void
InstructionQueue<Impl>::DependencyEntry::insert(DynInstPtr &new_inst)
{
//Add this new, dependent instruction at the head of the dependency
//chain.
// First create the entry that will be added to the head of the
// dependency chain.
DependencyEntry *new_entry = new DependencyEntry;
new_entry->next = this->next;
new_entry->inst = new_inst;
// Then actually add it to the chain.
this->next = new_entry;
++mem_alloc_counter;
}
template <class Impl>
void
InstructionQueue<Impl>::DependencyEntry::remove(DynInstPtr &inst_to_remove)
{
DependencyEntry *prev = this;
DependencyEntry *curr = this->next;
// Make sure curr isn't NULL. Because this instruction is being
// removed from a dependency list, it must have been placed there at
// an earlier time. The dependency chain should not be empty,
// unless the instruction dependent upon it is already ready.
if (curr == NULL) {
return;
}
// Find the instruction to remove within the dependency linked list.
while(curr->inst != inst_to_remove)
{
prev = curr;
curr = curr->next;
assert(curr != NULL);
}
// Now remove this instruction from the list.
prev->next = curr->next;
--mem_alloc_counter;
// Could push this off to the destructor of DependencyEntry
curr->inst = NULL;
delete curr;
}
template <class Impl>
void
InstructionQueue<Impl>::addIfReady(DynInstPtr &inst)
@@ -1090,8 +1086,6 @@ InstructionQueue<Impl>::dumpLists()
cprintf("Ready branch list size: %i\n", readyBranchInsts.size());
// cprintf("Ready memory list size: %i\n", readyMemInsts.size());
cprintf("Ready misc list size: %i\n", readyMiscInsts.size());
cprintf("Squashed list size: %i\n", squashedInsts.size());
@@ -1110,5 +1104,3 @@ InstructionQueue<Impl>::dumpLists()
cprintf("\n");
}
#endif // __INST_QUEUE_IMPL_HH__

View File

@@ -1,12 +1,12 @@
#ifndef __MEM_DEP_UNIT_HH__
#define __MEM_DEP_UNIT_HH__
#ifndef __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
#define __CPU_BETA_CPU_MEM_DEP_UNIT_HH__
#include <set>
#include <map>
#include <set>
#include "cpu/inst_seq.hh"
#include "base/statistics.hh"
#include "cpu/inst_seq.hh"
/**
* Memory dependency unit class. This holds the memory dependence predictor.
@@ -34,6 +34,12 @@ class MemDepUnit {
void insertNonSpec(DynInstPtr &inst);
// Will want to make this operation relatively fast. Right now it
// is somewhat slow.
DynInstPtr &top();
void pop();
void regsReady(DynInstPtr &inst);
void nonSpecInstReady(DynInstPtr &inst);
@@ -46,12 +52,6 @@ class MemDepUnit {
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
// Will want to make this operation relatively fast. Right now it
// kind of sucks.
DynInstPtr &top();
void pop();
inline bool empty()
{ return readyInsts.empty(); }
@@ -91,11 +91,8 @@ class MemDepUnit {
}
};
private:
inline void moveToReady(dep_it_t &woken_inst);
private:
/** List of instructions that have passed through rename, yet are still
* waiting on either a memory dependence to resolve or source registers to
* become available before they can issue.
@@ -137,4 +134,4 @@ class MemDepUnit {
Stats::Scalar<> conflictingStores;
};
#endif
#endif // __CPU_BETA_CPU_MEM_DEP_UNIT_HH__

View File

@@ -1,5 +1,5 @@
#ifndef __RAS_HH__
#define __RAS_HH__
#ifndef __CPU_BETA_CPU_RAS_HH__
#define __CPU_BETA_CPU_RAS_HH__
// For Addr type.
#include "arch/alpha/isa_traits.hh"
@@ -23,7 +23,7 @@ class ReturnAddrStack
private:
inline void incrTos()
{ tos = (tos + 1) % numEntries; }
{ if (++tos == numEntries) tos = 0; }
inline void decrTos()
{ tos = (tos == 0 ? numEntries - 1 : tos - 1); }
@@ -37,4 +37,4 @@ class ReturnAddrStack
unsigned tos;
};
#endif // __RAS_HH__
#endif // __CPU_BETA_CPU_RAS_HH__

View File

@@ -8,8 +8,8 @@
#include "cpu/beta_cpu/comm.hh"
#ifdef FULL_SYSTEM
#include "kern/kernel_stats.hh"
#include "arch/alpha/ev5.hh"
#include "kern/kernel_stats.hh"
using namespace EV5;
#endif
@@ -19,8 +19,6 @@ using namespace EV5;
// Things that are in the ifdef FULL_SYSTEM are pretty dependent on the ISA,
// and should go in the AlphaFullCPU.
extern void debug_break();
template <class Impl>
class PhysRegFile
{
@@ -203,8 +201,11 @@ class PhysRegFile
/** Miscellaneous register file. */
MiscRegFile miscRegs;
Addr pc; // program counter
Addr npc; // next-cycle program counter
/** Program counter. */
Addr pc;
/** Next-cycle program counter. */
Addr npc;
#ifdef FULL_SYSTEM
private:
@@ -408,7 +409,6 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
// write entire quad w/ no side-effect
old = ipr[idx];
ipr[idx] = val;
// kernelStats.context(old, val);
break;
case ISA::IPR_DTB_PTE:
@@ -435,14 +435,9 @@ PhysRegFile<Impl>::setIpr(int idx, uint64_t val)
// only write least significant five bits - interrupt level
ipr[idx] = val & 0x1f;
// kernelStats.swpipl(ipr[idx]);
break;
case ISA::IPR_DTB_CM:
// if (val & 0x18)
// kernelStats->mode(Kernel::user);
// else
// kernelStats->mode(Kernel::kernel);
case ISA::IPR_ICM:
// only write two mode bits - processor mode

View File

@@ -507,6 +507,7 @@ SimpleRename<Impl>::tick()
DPRINTF(Rename, "Rename: Done squashing, going to running.\n");
_status = Running;
rename();
} else {
doSquash();
}
@@ -523,25 +524,6 @@ SimpleRename<Impl>::tick()
#endif
}
// Perhaps put this outside of this function, since this will
// happen regardless of whether or not the stage is blocked or
// squashing.
// Read from the time buffer any necessary data.
// Read registers that are freed, and add them to the freelist.
// This is unnecessary due to the history buffer (assuming the history
// buffer works properly).
/*
while(!fromCommit->commitInfo.freeRegs.empty())
{
PhysRegIndex freed_reg = fromCommit->commitInfo.freeRegs.back();
DPRINTF(Rename, "Rename: Adding freed register %i to freelist.\n",
(int)freed_reg);
freeList->addReg(freed_reg);
fromCommit->commitInfo.freeRegs.pop_back();
}
*/
}
template<class Impl>

View File

@@ -10,52 +10,52 @@ TournamentBP::TournamentBP(unsigned _local_predictor_size,
unsigned _choice_predictor_size,
unsigned _choice_ctr_bits,
unsigned _instShiftAmt)
: local_predictor_size(_local_predictor_size),
local_ctr_bits(_local_ctr_bits),
local_history_table_size(_local_history_table_size),
local_history_bits(_local_history_bits),
global_predictor_size(_global_predictor_size),
global_ctr_bits(_global_ctr_bits),
global_history_bits(_global_history_bits),
choice_predictor_size(_global_predictor_size),
choice_ctr_bits(_choice_ctr_bits),
: localPredictorSize(_local_predictor_size),
localCtrBits(_local_ctr_bits),
localHistoryTableSize(_local_history_table_size),
localHistoryBits(_local_history_bits),
globalPredictorSize(_global_predictor_size),
globalCtrBits(_global_ctr_bits),
globalHistoryBits(_global_history_bits),
choicePredictorSize(_global_predictor_size),
choiceCtrBits(_choice_ctr_bits),
instShiftAmt(_instShiftAmt)
{
//Should do checks here to make sure sizes are correct (powers of 2)
//Setup the array of counters for the local predictor
local_ctrs = new SatCounter[local_predictor_size];
localCtrs = new SatCounter[localPredictorSize];
for (int i = 0; i < local_predictor_size; ++i)
local_ctrs[i].setBits(local_ctr_bits);
for (int i = 0; i < localPredictorSize; ++i)
localCtrs[i].setBits(localCtrBits);
//Setup the history table for the local table
local_history_table = new unsigned[local_history_table_size];
localHistoryTable = new unsigned[localHistoryTableSize];
for (int i = 0; i < local_history_table_size; ++i)
local_history_table[i] = 0;
for (int i = 0; i < localHistoryTableSize; ++i)
localHistoryTable[i] = 0;
// Setup the local history mask
localHistoryMask = (1 << local_history_bits) - 1;
localHistoryMask = (1 << localHistoryBits) - 1;
//Setup the array of counters for the global predictor
global_ctrs = new SatCounter[global_predictor_size];
globalCtrs = new SatCounter[globalPredictorSize];
for (int i = 0; i < global_predictor_size; ++i)
global_ctrs[i].setBits(global_ctr_bits);
for (int i = 0; i < globalPredictorSize; ++i)
globalCtrs[i].setBits(globalCtrBits);
//Clear the global history
global_history = 0;
globalHistory = 0;
// Setup the global history mask
globalHistoryMask = (1 << global_history_bits) - 1;
globalHistoryMask = (1 << globalHistoryBits) - 1;
//Setup the array of counters for the choice predictor
choice_ctrs = new SatCounter[choice_predictor_size];
choiceCtrs = new SatCounter[choicePredictorSize];
for (int i = 0; i < choice_predictor_size; ++i)
choice_ctrs[i].setBits(choice_ctr_bits);
for (int i = 0; i < choicePredictorSize; ++i)
choiceCtrs[i].setBits(choiceCtrBits);
threshold = (1 << (local_ctr_bits - 1)) - 1;
threshold = (1 << (localCtrBits - 1)) - 1;
threshold = threshold / 2;
}
@@ -63,29 +63,29 @@ inline
unsigned
TournamentBP::calcLocHistIdx(Addr &branch_addr)
{
return (branch_addr >> instShiftAmt) & (local_history_table_size - 1);
return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
}
inline
void
TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
{
global_history = (global_history << 1) | 1;
global_history = global_history & globalHistoryMask;
globalHistory = (globalHistory << 1) | 1;
globalHistory = globalHistory & globalHistoryMask;
local_history_table[local_history_idx] =
(local_history_table[local_history_idx] << 1) | 1;
localHistoryTable[local_history_idx] =
(localHistoryTable[local_history_idx] << 1) | 1;
}
inline
void
TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
{
global_history = (global_history << 1);
global_history = global_history & globalHistoryMask;
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
local_history_table[local_history_idx] =
(local_history_table[local_history_idx] << 1);
localHistoryTable[local_history_idx] =
(localHistoryTable[local_history_idx] << 1);
}
bool
@@ -100,15 +100,15 @@ TournamentBP::lookup(Addr &branch_addr)
//Lookup in the local predictor to get its branch prediction
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = local_history_table[local_history_idx]
local_predictor_idx = localHistoryTable[local_history_idx]
& localHistoryMask;
local_prediction = local_ctrs[local_predictor_idx].read();
local_prediction = localCtrs[local_predictor_idx].read();
//Lookup in the global predictor to get its branch prediction
global_prediction = global_ctrs[global_history].read();
global_prediction = globalCtrs[globalHistory].read();
//Lookup in the choice predictor to see which one to use
choice_prediction = choice_ctrs[global_history].read();
choice_prediction = choiceCtrs[globalHistory].read();
//@todo Put a threshold value in for the three predictors that can
// be set through the constructor (so this isn't hard coded).
@@ -117,21 +117,21 @@ TournamentBP::lookup(Addr &branch_addr)
if (global_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
assert(global_history < global_predictor_size &&
local_history_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
global_ctrs[global_history].increment();
local_ctrs[local_history_idx].increment();
globalCtrs[globalHistory].increment();
localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
assert(global_history < global_predictor_size &&
local_history_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
global_ctrs[global_history].decrement();
local_ctrs[local_history_idx].decrement();
globalCtrs[globalHistory].decrement();
localCtrs[local_history_idx].decrement();
return false;
}
@@ -139,21 +139,21 @@ TournamentBP::lookup(Addr &branch_addr)
if (local_prediction > threshold) {
updateHistoriesTaken(local_history_idx);
assert(global_history < global_predictor_size &&
local_history_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
global_ctrs[global_history].increment();
local_ctrs[local_history_idx].increment();
globalCtrs[globalHistory].increment();
localCtrs[local_history_idx].increment();
return true;
} else {
updateHistoriesNotTaken(local_history_idx);
assert(global_history < global_predictor_size &&
local_history_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_history_idx < localPredictorSize);
global_ctrs[global_history].decrement();
local_ctrs[local_history_idx].decrement();
globalCtrs[globalHistory].decrement();
localCtrs[local_history_idx].decrement();
return false;
}
@@ -174,20 +174,20 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
bool global_pred_taken;
// Load the correct global history into the register.
global_history = correct_gh;
globalHistory = correct_gh;
// Get the local predictor's current prediction, remove the incorrect
// update, and update the local predictor
local_history_idx = calcLocHistIdx(branch_addr);
local_predictor_idx = local_history_table[local_history_idx];
local_predictor_idx = localHistoryTable[local_history_idx];
local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
local_prediction = local_ctrs[local_predictor_idx].read();
local_prediction = localCtrs[local_predictor_idx].read();
local_pred_taken = local_prediction > threshold;
//Get the global predictor's current prediction, and update the
//global predictor
global_prediction = global_ctrs[global_history].read();
global_prediction = globalCtrs[globalHistory].read();
global_pred_taken = global_prediction > threshold;
//Update the choice predictor to tell it which one was correct
@@ -195,34 +195,34 @@ TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
//If the local prediction matches the actual outcome, decerement
//the counter. Otherwise increment the counter.
if (local_pred_taken == taken) {
choice_ctrs[global_history].decrement();
choiceCtrs[globalHistory].decrement();
} else {
choice_ctrs[global_history].increment();
choiceCtrs[globalHistory].increment();
}
}
if (taken) {
assert(global_history < global_predictor_size &&
local_predictor_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_predictor_idx < localPredictorSize);
local_ctrs[local_predictor_idx].increment();
global_ctrs[global_history].increment();
localCtrs[local_predictor_idx].increment();
globalCtrs[globalHistory].increment();
global_history = (global_history << 1) | 1;
global_history = global_history & globalHistoryMask;
globalHistory = (globalHistory << 1) | 1;
globalHistory = globalHistory & globalHistoryMask;
local_history_table[local_history_idx] |= 1;
localHistoryTable[local_history_idx] |= 1;
}
else {
assert(global_history < global_predictor_size &&
local_predictor_idx < local_predictor_size);
assert(globalHistory < globalPredictorSize &&
local_predictor_idx < localPredictorSize);
local_ctrs[local_predictor_idx].decrement();
global_ctrs[global_history].decrement();
localCtrs[local_predictor_idx].decrement();
globalCtrs[globalHistory].decrement();
global_history = (global_history << 1);
global_history = global_history & globalHistoryMask;
globalHistory = (globalHistory << 1);
globalHistory = globalHistory & globalHistoryMask;
local_history_table[local_history_idx] &= ~1;
localHistoryTable[local_history_idx] &= ~1;
}
}

View File

@@ -37,7 +37,7 @@ class TournamentBP
*/
void update(Addr &branch_addr, unsigned global_history, bool taken);
inline unsigned readGlobalHist() { return global_history; }
inline unsigned readGlobalHist() { return globalHistory; }
private:
@@ -50,56 +50,56 @@ class TournamentBP
inline void updateHistoriesNotTaken(unsigned local_history_idx);
/** Local counters. */
SatCounter *local_ctrs;
SatCounter *localCtrs;
/** Size of the local predictor. */
unsigned local_predictor_size;
unsigned localPredictorSize;
/** Number of bits of the local predictor's counters. */
unsigned local_ctr_bits;
unsigned localCtrBits;
/** Array of local history table entries. */
unsigned *local_history_table;
unsigned *localHistoryTable;
/** Size of the local history table. */
unsigned local_history_table_size;
unsigned localHistoryTableSize;
/** Number of bits for each entry of the local history table.
* @todo Doesn't this come from the size of the local predictor?
*/
unsigned local_history_bits;
unsigned localHistoryBits;
/** Mask to get the proper local history. */
unsigned localHistoryMask;
/** Array of counters that make up the global predictor. */
SatCounter *global_ctrs;
SatCounter *globalCtrs;
/** Size of the global predictor. */
unsigned global_predictor_size;
unsigned globalPredictorSize;
/** Number of bits of the global predictor's counters. */
unsigned global_ctr_bits;
unsigned globalCtrBits;
/** Global history register. */
unsigned global_history;
unsigned globalHistory;
/** Number of bits for the global history. */
unsigned global_history_bits;
unsigned globalHistoryBits;
/** Mask to get the proper global history. */
unsigned globalHistoryMask;
/** Array of counters that make up the choice predictor. */
SatCounter *choice_ctrs;
SatCounter *choiceCtrs;
/** Size of the choice predictor (identical to the global predictor). */
unsigned choice_predictor_size;
unsigned choicePredictorSize;
/** Number of bits of the choice predictor's counters. */
unsigned choice_ctr_bits;
unsigned choiceCtrBits;
/** Number of bits to shift the instruction over to get rid of the word
* offset.