Implement BTB using the cache library (#1537)

This enables the BTB to be associative and use various replacement
policies.
This commit is contained in:
Pranith
2024-10-10 09:05:22 -07:00
committed by GitHub
parent 1c8ab47a54
commit 50f652a2ee
8 changed files with 394 additions and 133 deletions

View File

@@ -1683,6 +1683,15 @@ class HPI_MMU(ArmMMU):
class HPI_BTB(SimpleBTB):
numEntries = 128
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
class HPI_BP(TournamentBP):

View File

@@ -111,6 +111,15 @@ class O3_ARM_v7a_FUP(FUPool):
class O3_ARM_v7a_BTB(SimpleBTB):
numEntries = 2048
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
# Bi-Mode Branch Predictor

View File

@@ -108,6 +108,15 @@ class ex5_big_FUP(FUPool):
class ex5_big_BTB(SimpleBTB):
numEntries = 4096
tagBits = 18
associativity = 1
instShiftAmt = 2
btbReplPolicy = LRURP()
btbIndexingPolicy = BTBSetAssociative(
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
assoc=Parent.associativity,
tag_bits=Parent.tagBits,
)
# Bi-Mode Branch Predictor

View File

@@ -38,6 +38,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.objects.ClockedObject import ClockedObject
from m5.objects.IndexingPolicies import *
from m5.objects.ReplacementPolicies import *
from m5.params import *
from m5.proxy import *
from m5.SimObject import *
@@ -83,6 +85,38 @@ class BranchTargetBuffer(ClockedObject):
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
class BTBIndexingPolicy(SimObject):
type = "BTBIndexingPolicy"
abstract = True
cxx_class = "gem5::IndexingPolicyTemplate<gem5::BTBTagType>"
cxx_header = "cpu/pred/btb_entry.hh"
cxx_template_params = ["class Types"]
# Get the associativity
assoc = Param.Int(Parent.assoc, "associativity")
class BTBSetAssociative(BTBIndexingPolicy):
type = "BTBSetAssociative"
cxx_class = "gem5::BTBSetAssociative"
cxx_header = "cpu/pred/btb_entry.hh"
# Get the number of entries in the BTB from the parent
num_entries = Param.Unsigned(
Parent.numEntries, "Number of entries in the BTB"
)
# Set shift for the index. Ignore lower 2 bits for a 4 byte instruction.
set_shift = Param.Unsigned(2, "Number of bits to shift PC to get index")
# Total number of bits in the tag.
# This is above the index and offset bit
tag_bits = Param.Unsigned(64, "number of bits in the tag")
# Number of threads sharing the BTB
numThreads = Param.Unsigned(Parent.numThreads, "Number of threads")
class SimpleBTB(BranchTargetBuffer):
type = "SimpleBTB"
cxx_class = "gem5::branch_prediction::SimpleBTB"
@@ -93,6 +127,19 @@ class SimpleBTB(BranchTargetBuffer):
instShiftAmt = Param.Unsigned(
Parent.instShiftAmt, "Number of bits to shift instructions by"
)
associativity = Param.Unsigned(1, "BTB associativity")
btbReplPolicy = Param.BaseReplacementPolicy(
LRURP(), "BTB replacement policy"
)
btbIndexingPolicy = Param.BTBIndexingPolicy(
BTBSetAssociative(
assoc=Parent.associativity,
num_entries=Parent.numEntries,
set_shift=Parent.instShiftAmt,
numThreads=1,
),
"BTB indexing policy",
)
class IndirectPredictor(SimObject):

View File

@@ -45,7 +45,7 @@ SimObject('BranchPredictor.py',
sim_objects=[
'BranchPredictor',
'IndirectPredictor', 'SimpleIndirectPredictor',
'BranchTargetBuffer', 'SimpleBTB',
'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative',
'ReturnAddrStack',
'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor',
'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB',

288
src/cpu/pred/btb_entry.hh Normal file
View File

@@ -0,0 +1,288 @@
/*
* Copyright (c) 2024 Pranith Kumar
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @file
* Declaration of a BTB entry and BTB indexing policy.
*/
#ifndef __CPU_PRED_BTB_ENTRY_HH__
#define __CPU_PRED_BTB_ENTRY_HH__
#include <vector>
#include "arch/generic/pcstate.hh"
#include "base/intmath.hh"
#include "base/types.hh"
#include "cpu/static_inst.hh"
#include "mem/cache/replacement_policies/replaceable_entry.hh"
#include "mem/cache/tags/indexing_policies/base.hh"
#include "params/BTBIndexingPolicy.hh"
#include "params/BTBSetAssociative.hh"
namespace gem5 {
class BTBTagType
{
public:
struct KeyType
{
Addr address;
ThreadID tid;
};
using Params = BTBIndexingPolicyParams;
};
using BTBIndexingPolicy = IndexingPolicyTemplate<BTBTagType>;
template class IndexingPolicyTemplate<BTBTagType>;
class BTBSetAssociative : public BTBIndexingPolicy
{
public:
PARAMS(BTBSetAssociative);
using KeyType = BTBTagType::KeyType;
BTBSetAssociative(const Params &p)
: BTBIndexingPolicy(p, p.num_entries, p.set_shift),
tagMask(mask(p.tag_bits))
{
setNumThreads(p.numThreads);
}
protected:
/**
* Extract the set index for the instruction PC based on tid.
*/
uint32_t
extractSet(const KeyType &key) const
{
return ((key.address >> setShift)
^ (key.tid << (tagShift - setShift - log2NumThreads)))
& setMask;
}
public:
/**
* Find all possible entries for insertion and replacement of an address.
*/
std::vector<ReplaceableEntry*>
getPossibleEntries(const KeyType &key) const override
{
auto set_idx = extractSet(key);
assert(set_idx < sets.size());
return sets[set_idx];
}
/**
* Set number of threads sharing the BTB
*/
void
setNumThreads(unsigned num_threads)
{
log2NumThreads = log2i(num_threads);
}
/**
* Generate the tag from the given address.
*/
Addr
extractTag(const Addr addr) const override
{
return (addr >> tagShift) & tagMask;
}
Addr regenerateAddr(const KeyType &key,
const ReplaceableEntry* entry) const override
{
panic("Not implemented!");
return 0;
}
private:
const uint64_t tagMask;
unsigned log2NumThreads;
};
namespace branch_prediction
{
class BTBEntry : public ReplaceableEntry
{
public:
using IndexingPolicy = gem5::BTBIndexingPolicy;
using KeyType = gem5::BTBTagType::KeyType;
using TagExtractor = std::function<Addr(Addr)>;
/** Default constructor */
BTBEntry(TagExtractor ext)
: inst(nullptr), extractTag(ext), valid(false), tag({MaxAddr, -1})
{}
/** Update the target and instruction in the BTB entry.
* During insertion, only the tag (key) is updated.
*/
void
update(const PCStateBase &_target,
StaticInstPtr _inst)
{
set(target, _target);
inst = _inst;
}
/**
* Checks if the given tag information corresponds to this entry's.
*/
bool
match(const KeyType &key) const
{
return isValid() && (tag.address == extractTag(key.address))
&& (tag.tid == key.tid);
}
/**
* Insert the block by assigning it a tag and marking it valid. Touches
* block if it hadn't been touched previously.
*/
void
insert(const KeyType &key)
{
setValid();
setTag({extractTag(key.address), key.tid});
}
/** Copy constructor */
BTBEntry(const BTBEntry &other)
{
valid = other.valid;
tag = other.tag;
inst = other.inst;
extractTag = other.extractTag;
set(target, other.target);
}
/** Assignment operator */
BTBEntry& operator=(const BTBEntry &other)
{
valid = other.valid;
tag = other.tag;
inst = other.inst;
extractTag = other.extractTag;
set(target, other.target);
return *this;
}
/**
* Checks if the entry is valid.
*/
bool isValid() const { return valid; }
/**
* Get tag associated to this block.
*/
KeyType getTag() const { return tag; }
/** Invalidate the block. Its contents are no longer valid. */
void
invalidate()
{
valid = false;
setTag({MaxAddr, -1});
}
/** The entry's target. */
std::unique_ptr<PCStateBase> target;
/** Pointer to the static branch inst at this address */
StaticInstPtr inst;
std::string
print() const override
{
return csprintf("tag: %#x tid: %d valid: %d | %s", tag.address, tag.tid,
isValid(), ReplaceableEntry::print());
}
protected:
/**
* Set tag associated to this block.
*/
void setTag(KeyType _tag) { tag = _tag; }
/** Set valid bit. The block must be invalid beforehand. */
void
setValid()
{
assert(!isValid());
valid = true;
}
private:
/** Callback used to extract the tag from the entry */
TagExtractor extractTag;
/**
* Valid bit. The contents of this entry are only valid if this bit is set.
* @sa invalidate()
* @sa insert()
*/
bool valid;
/** The entry's tag. */
KeyType tag;
};
} // namespace gem5::branch_prediction
/**
* This helper generates a tag extractor function object
* which will be typically used by Replaceable entries indexed
* with the BaseIndexingPolicy.
* It allows to "decouple" indexing from tagging. Those entries
* would call the functor without directly holding a pointer
* to the indexing policy which should reside in the cache.
*/
static constexpr auto
genTagExtractor(BTBIndexingPolicy *ip)
{
return [ip] (Addr addr) { return ip->extractTag(addr); };
}
}
#endif //__CPU_PRED_BTB_ENTRY_HH__

View File

@@ -44,84 +44,38 @@
#include "base/trace.hh"
#include "debug/BTB.hh"
namespace gem5
{
namespace branch_prediction
namespace gem5::branch_prediction
{
SimpleBTB::SimpleBTB(const SimpleBTBParams &p)
: BranchTargetBuffer(p),
numEntries(p.numEntries),
tagBits(p.tagBits),
instShiftAmt(p.instShiftAmt),
log2NumThreads(floorLog2(p.numThreads))
btb("simpleBTB", p.numEntries, p.associativity,
p.btbReplPolicy, p.btbIndexingPolicy,
BTBEntry(genTagExtractor(p.btbIndexingPolicy)))
{
DPRINTF(BTB, "BTB: Creating BTB object.\n");
if (!isPowerOf2(numEntries)) {
if (!isPowerOf2(p.numEntries)) {
fatal("BTB entries is not a power of 2!");
}
btb.resize(numEntries);
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
idxMask = numEntries - 1;
tagMask = (1 << tagBits) - 1;
tagShiftAmt = instShiftAmt + floorLog2(numEntries);
}
void
SimpleBTB::memInvalidate()
{
for (unsigned i = 0; i < numEntries; ++i) {
btb[i].valid = false;
}
btb.clear();
}
inline
unsigned
SimpleBTB::getIndex(Addr instPC, ThreadID tid)
{
// Need to shift PC over by the word offset.
return ((instPC >> instShiftAmt)
^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads)))
& idxMask;
}
inline
Addr
SimpleBTB::getTag(Addr instPC)
{
return (instPC >> tagShiftAmt) & tagMask;
}
SimpleBTB::BTBEntry *
BTBEntry *
SimpleBTB::findEntry(Addr instPC, ThreadID tid)
{
unsigned btb_idx = getIndex(instPC, tid);
Addr inst_tag = getTag(instPC);
assert(btb_idx < numEntries);
if (btb[btb_idx].valid
&& inst_tag == btb[btb_idx].tag
&& btb[btb_idx].tid == tid) {
return &btb[btb_idx];
}
return nullptr;
return btb.findEntry({instPC, tid});
}
bool
SimpleBTB::valid(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.findEntry({instPC, tid});
return entry != nullptr;
}
@@ -134,11 +88,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
{
stats.lookups[type]++;
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.accessEntry({instPC, tid});
if (entry) {
return entry->target.get();
}
stats.misses[type]++;
return nullptr;
}
@@ -146,31 +101,27 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type)
const StaticInstPtr
SimpleBTB::getInst(ThreadID tid, Addr instPC)
{
BTBEntry *entry = findEntry(instPC, tid);
BTBEntry *entry = btb.findEntry({instPC, tid});
if (entry) {
return entry->inst;
}
return nullptr;
}
void
SimpleBTB::update(ThreadID tid, Addr instPC,
const PCStateBase &target,
BranchType type, StaticInstPtr inst)
const PCStateBase &target,
BranchType type, StaticInstPtr inst)
{
unsigned btb_idx = getIndex(instPC, tid);
assert(btb_idx < numEntries);
stats.updates[type]++;
btb[btb_idx].tid = tid;
btb[btb_idx].valid = true;
set(btb[btb_idx].target, target);
btb[btb_idx].tag = getTag(instPC);
btb[btb_idx].inst = inst;
BTBEntry *victim = btb.findVictim({instPC, tid});
btb.insertEntry({instPC, tid}, victim);
victim->update(target, inst);
}
} // namespace branch_prediction
} // namespace gem5
} // namespace gem5::branch_prediction

View File

@@ -41,15 +41,16 @@
#ifndef __CPU_PRED_SIMPLE_BTB_HH__
#define __CPU_PRED_SIMPLE_BTB_HH__
#include "base/cache/associative_cache.hh"
#include "base/logging.hh"
#include "base/types.hh"
#include "cpu/pred/btb.hh"
#include "cpu/pred/btb_entry.hh"
#include "mem/cache/replacement_policies/replaceable_entry.hh"
#include "mem/cache/tags/indexing_policies/base.hh"
#include "params/SimpleBTB.hh"
namespace gem5
{
namespace branch_prediction
namespace gem5::branch_prediction
{
class SimpleBTB : public BranchTargetBuffer
@@ -60,44 +61,13 @@ class SimpleBTB : public BranchTargetBuffer
void memInvalidate() override;
bool valid(ThreadID tid, Addr instPC) override;
const PCStateBase *lookup(ThreadID tid, Addr instPC,
BranchType type = BranchType::NoBranch) override;
BranchType type = BranchType::NoBranch) override;
void update(ThreadID tid, Addr instPC, const PCStateBase &target_pc,
BranchType type = BranchType::NoBranch,
StaticInstPtr inst = nullptr) override;
BranchType type = BranchType::NoBranch,
StaticInstPtr inst = nullptr) override;
const StaticInstPtr getInst(ThreadID tid, Addr instPC) override;
private:
struct BTBEntry
{
/** The entry's tag. */
Addr tag = 0;
/** The entry's target. */
std::unique_ptr<PCStateBase> target;
/** The entry's thread id. */
ThreadID tid;
/** Whether or not the entry is valid. */
bool valid = false;
/** Pointer to the static branch instruction at this address */
StaticInstPtr inst = nullptr;
};
/** Returns the index into the BTB, based on the branch's PC.
* @param inst_PC The branch to look up.
* @return Returns the index into the BTB.
*/
inline unsigned getIndex(Addr instPC, ThreadID tid);
/** Returns the tag bits of a given address.
* @param inst_PC The branch's address.
* @return Returns the tag bits.
*/
inline Addr getTag(Addr instPC);
/** Internal call to find an address in the BTB
* @param instPC The branch's address.
@@ -106,31 +76,9 @@ class SimpleBTB : public BranchTargetBuffer
BTBEntry *findEntry(Addr instPC, ThreadID tid);
/** The actual BTB. */
std::vector<BTBEntry> btb;
/** The number of entries in the BTB. */
unsigned numEntries;
/** The index mask. */
unsigned idxMask;
/** The number of tag bits per entry. */
unsigned tagBits;
/** The tag mask. */
unsigned tagMask;
/** Number of bits to shift PC when calculating index. */
unsigned instShiftAmt;
/** Number of bits to shift PC when calculating tag. */
unsigned tagShiftAmt;
/** Log2 NumThreads used for hashing threadid */
unsigned log2NumThreads;
AssociativeCache<BTBEntry> btb;
};
} // namespace branch_prediction
} // namespace gem5
} // namespace gem5::branch_prediction
#endif // __CPU_PRED_SIMPLE_BTB_HH__