diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py index 826d4e19f4..36aa64eca5 100644 --- a/configs/common/cores/arm/HPI.py +++ b/configs/common/cores/arm/HPI.py @@ -1683,6 +1683,15 @@ class HPI_MMU(ArmMMU): class HPI_BTB(SimpleBTB): numEntries = 128 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) class HPI_BP(TournamentBP): diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py index 45bb391bb1..ee42c3c062 100644 --- a/configs/common/cores/arm/O3_ARM_v7a.py +++ b/configs/common/cores/arm/O3_ARM_v7a.py @@ -111,6 +111,15 @@ class O3_ARM_v7a_FUP(FUPool): class O3_ARM_v7a_BTB(SimpleBTB): numEntries = 2048 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) # Bi-Mode Branch Predictor diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py index f3b55fd3a8..8ea04aa5f7 100644 --- a/configs/common/cores/arm/ex5_big.py +++ b/configs/common/cores/arm/ex5_big.py @@ -108,6 +108,15 @@ class ex5_big_FUP(FUPool): class ex5_big_BTB(SimpleBTB): numEntries = 4096 tagBits = 18 + associativity = 1 + instShiftAmt = 2 + btbReplPolicy = LRURP() + btbIndexingPolicy = BTBSetAssociative( + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + assoc=Parent.associativity, + tag_bits=Parent.tagBits, + ) # Bi-Mode Branch Predictor diff --git a/src/cpu/pred/BranchPredictor.py b/src/cpu/pred/BranchPredictor.py index a10b2c2cef..5b90826315 100644 --- a/src/cpu/pred/BranchPredictor.py +++ b/src/cpu/pred/BranchPredictor.py @@ -38,6 +38,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.objects.ClockedObject import ClockedObject +from m5.objects.IndexingPolicies import * +from m5.objects.ReplacementPolicies import * from m5.params import * from m5.proxy import * from m5.SimObject import * @@ -83,6 +85,38 @@ class BranchTargetBuffer(ClockedObject): numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") +class BTBIndexingPolicy(SimObject): + type = "BTBIndexingPolicy" + abstract = True + cxx_class = "gem5::IndexingPolicyTemplate" + cxx_header = "cpu/pred/btb_entry.hh" + cxx_template_params = ["class Types"] + + # Get the associativity + assoc = Param.Int(Parent.assoc, "associativity") + + +class BTBSetAssociative(BTBIndexingPolicy): + type = "BTBSetAssociative" + cxx_class = "gem5::BTBSetAssociative" + cxx_header = "cpu/pred/btb_entry.hh" + + # Get the number of entries in the BTB from the parent + num_entries = Param.Unsigned( + Parent.numEntries, "Number of entries in the BTB" + ) + + # Set shift for the index. Ignore lower 2 bits for a 4 byte instruction. + set_shift = Param.Unsigned(2, "Number of bits to shift PC to get index") + + # Total number of bits in the tag. + # This is above the index and offset bit + tag_bits = Param.Unsigned(64, "number of bits in the tag") + + # Number of threads sharing the BTB + numThreads = Param.Unsigned(Parent.numThreads, "Number of threads") + + class SimpleBTB(BranchTargetBuffer): type = "SimpleBTB" cxx_class = "gem5::branch_prediction::SimpleBTB" @@ -93,6 +127,19 @@ class SimpleBTB(BranchTargetBuffer): instShiftAmt = Param.Unsigned( Parent.instShiftAmt, "Number of bits to shift instructions by" ) + associativity = Param.Unsigned(1, "BTB associativity") + btbReplPolicy = Param.BaseReplacementPolicy( + LRURP(), "BTB replacement policy" + ) + btbIndexingPolicy = Param.BTBIndexingPolicy( + BTBSetAssociative( + assoc=Parent.associativity, + num_entries=Parent.numEntries, + set_shift=Parent.instShiftAmt, + numThreads=1, + ), + "BTB indexing policy", + ) class IndirectPredictor(SimObject): diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript index ec3102cada..6c03dd8a1b 100644 --- a/src/cpu/pred/SConscript +++ b/src/cpu/pred/SConscript @@ -45,7 +45,7 @@ SimObject('BranchPredictor.py', sim_objects=[ 'BranchPredictor', 'IndirectPredictor', 'SimpleIndirectPredictor', - 'BranchTargetBuffer', 'SimpleBTB', + 'BranchTargetBuffer', 'SimpleBTB', 'BTBIndexingPolicy', 'BTBSetAssociative', 'ReturnAddrStack', 'LocalBP', 'TournamentBP', 'BiModeBP', 'TAGEBase', 'TAGE', 'LoopPredictor', 'TAGE_SC_L_TAGE', 'TAGE_SC_L_TAGE_64KB', 'TAGE_SC_L_TAGE_8KB', diff --git a/src/cpu/pred/btb_entry.hh b/src/cpu/pred/btb_entry.hh new file mode 100644 index 0000000000..a445ac4775 --- /dev/null +++ b/src/cpu/pred/btb_entry.hh @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2024 Pranith Kumar + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Declaration of a BTB entry and BTB indexing policy. + */ + +#ifndef __CPU_PRED_BTB_ENTRY_HH__ +#define __CPU_PRED_BTB_ENTRY_HH__ + +#include + +#include "arch/generic/pcstate.hh" +#include "base/intmath.hh" +#include "base/types.hh" +#include "cpu/static_inst.hh" +#include "mem/cache/replacement_policies/replaceable_entry.hh" +#include "mem/cache/tags/indexing_policies/base.hh" +#include "params/BTBIndexingPolicy.hh" +#include "params/BTBSetAssociative.hh" + +namespace gem5 { + +class BTBTagType +{ + public: + struct KeyType + { + Addr address; + ThreadID tid; + }; + using Params = BTBIndexingPolicyParams; +}; + +using BTBIndexingPolicy = IndexingPolicyTemplate; +template class IndexingPolicyTemplate; + +class BTBSetAssociative : public BTBIndexingPolicy +{ + public: + PARAMS(BTBSetAssociative); + using KeyType = BTBTagType::KeyType; + + BTBSetAssociative(const Params &p) + : BTBIndexingPolicy(p, p.num_entries, p.set_shift), + tagMask(mask(p.tag_bits)) + { + setNumThreads(p.numThreads); + } + + protected: + /** + * Extract the set index for the instruction PC based on tid. + */ + uint32_t + extractSet(const KeyType &key) const + { + return ((key.address >> setShift) + ^ (key.tid << (tagShift - setShift - log2NumThreads))) + & setMask; + } + + public: + /** + * Find all possible entries for insertion and replacement of an address. + */ + std::vector + getPossibleEntries(const KeyType &key) const override + { + auto set_idx = extractSet(key); + + assert(set_idx < sets.size()); + + return sets[set_idx]; + } + + /** + * Set number of threads sharing the BTB + */ + void + setNumThreads(unsigned num_threads) + { + log2NumThreads = log2i(num_threads); + } + + /** + * Generate the tag from the given address. + */ + Addr + extractTag(const Addr addr) const override + { + return (addr >> tagShift) & tagMask; + } + + Addr regenerateAddr(const KeyType &key, + const ReplaceableEntry* entry) const override + { + panic("Not implemented!"); + return 0; + } + + private: + const uint64_t tagMask; + unsigned log2NumThreads; +}; + +namespace branch_prediction +{ + +class BTBEntry : public ReplaceableEntry +{ + public: + using IndexingPolicy = gem5::BTBIndexingPolicy; + using KeyType = gem5::BTBTagType::KeyType; + using TagExtractor = std::function; + + /** Default constructor */ + BTBEntry(TagExtractor ext) + : inst(nullptr), extractTag(ext), valid(false), tag({MaxAddr, -1}) + {} + + /** Update the target and instruction in the BTB entry. + * During insertion, only the tag (key) is updated. + */ + void + update(const PCStateBase &_target, + StaticInstPtr _inst) + { + set(target, _target); + inst = _inst; + } + + /** + * Checks if the given tag information corresponds to this entry's. + */ + bool + match(const KeyType &key) const + { + return isValid() && (tag.address == extractTag(key.address)) + && (tag.tid == key.tid); + } + + /** + * Insert the block by assigning it a tag and marking it valid. Touches + * block if it hadn't been touched previously. + */ + void + insert(const KeyType &key) + { + setValid(); + setTag({extractTag(key.address), key.tid}); + } + + /** Copy constructor */ + BTBEntry(const BTBEntry &other) + { + valid = other.valid; + tag = other.tag; + inst = other.inst; + extractTag = other.extractTag; + set(target, other.target); + } + + /** Assignment operator */ + BTBEntry& operator=(const BTBEntry &other) + { + valid = other.valid; + tag = other.tag; + inst = other.inst; + extractTag = other.extractTag; + set(target, other.target); + + return *this; + } + + /** + * Checks if the entry is valid. + */ + bool isValid() const { return valid; } + + /** + * Get tag associated to this block. + */ + KeyType getTag() const { return tag; } + + /** Invalidate the block. Its contents are no longer valid. */ + void + invalidate() + { + valid = false; + setTag({MaxAddr, -1}); + } + + /** The entry's target. */ + std::unique_ptr target; + + /** Pointer to the static branch inst at this address */ + StaticInstPtr inst; + + std::string + print() const override + { + return csprintf("tag: %#x tid: %d valid: %d | %s", tag.address, tag.tid, + isValid(), ReplaceableEntry::print()); + } + + protected: + /** + * Set tag associated to this block. + */ + void setTag(KeyType _tag) { tag = _tag; } + + /** Set valid bit. The block must be invalid beforehand. */ + void + setValid() + { + assert(!isValid()); + valid = true; + } + + private: + /** Callback used to extract the tag from the entry */ + TagExtractor extractTag; + + /** + * Valid bit. The contents of this entry are only valid if this bit is set. + * @sa invalidate() + * @sa insert() + */ + bool valid; + + /** The entry's tag. */ + KeyType tag; +}; + +} // namespace gem5::branch_prediction +/** + * This helper generates a tag extractor function object + * which will be typically used by Replaceable entries indexed + * with the BaseIndexingPolicy. + * It allows to "decouple" indexing from tagging. Those entries + * would call the functor without directly holding a pointer + * to the indexing policy which should reside in the cache. + */ +static constexpr auto +genTagExtractor(BTBIndexingPolicy *ip) +{ + return [ip] (Addr addr) { return ip->extractTag(addr); }; +} + +} + +#endif //__CPU_PRED_BTB_ENTRY_HH__ diff --git a/src/cpu/pred/simple_btb.cc b/src/cpu/pred/simple_btb.cc index c78caac7a8..0260ced8b3 100644 --- a/src/cpu/pred/simple_btb.cc +++ b/src/cpu/pred/simple_btb.cc @@ -44,84 +44,38 @@ #include "base/trace.hh" #include "debug/BTB.hh" -namespace gem5 -{ - -namespace branch_prediction +namespace gem5::branch_prediction { SimpleBTB::SimpleBTB(const SimpleBTBParams &p) : BranchTargetBuffer(p), - numEntries(p.numEntries), - tagBits(p.tagBits), - instShiftAmt(p.instShiftAmt), - log2NumThreads(floorLog2(p.numThreads)) + btb("simpleBTB", p.numEntries, p.associativity, + p.btbReplPolicy, p.btbIndexingPolicy, + BTBEntry(genTagExtractor(p.btbIndexingPolicy))) { DPRINTF(BTB, "BTB: Creating BTB object.\n"); - if (!isPowerOf2(numEntries)) { + if (!isPowerOf2(p.numEntries)) { fatal("BTB entries is not a power of 2!"); } - - btb.resize(numEntries); - - for (unsigned i = 0; i < numEntries; ++i) { - btb[i].valid = false; - } - - idxMask = numEntries - 1; - - tagMask = (1 << tagBits) - 1; - - tagShiftAmt = instShiftAmt + floorLog2(numEntries); } void SimpleBTB::memInvalidate() { - for (unsigned i = 0; i < numEntries; ++i) { - btb[i].valid = false; - } + btb.clear(); } -inline -unsigned -SimpleBTB::getIndex(Addr instPC, ThreadID tid) -{ - // Need to shift PC over by the word offset. - return ((instPC >> instShiftAmt) - ^ (tid << (tagShiftAmt - instShiftAmt - log2NumThreads))) - & idxMask; -} - -inline -Addr -SimpleBTB::getTag(Addr instPC) -{ - return (instPC >> tagShiftAmt) & tagMask; -} - -SimpleBTB::BTBEntry * +BTBEntry * SimpleBTB::findEntry(Addr instPC, ThreadID tid) { - unsigned btb_idx = getIndex(instPC, tid); - Addr inst_tag = getTag(instPC); - - assert(btb_idx < numEntries); - - if (btb[btb_idx].valid - && inst_tag == btb[btb_idx].tag - && btb[btb_idx].tid == tid) { - return &btb[btb_idx]; - } - - return nullptr; + return btb.findEntry({instPC, tid}); } bool SimpleBTB::valid(ThreadID tid, Addr instPC) { - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.findEntry({instPC, tid}); return entry != nullptr; } @@ -134,11 +88,12 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type) { stats.lookups[type]++; - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.accessEntry({instPC, tid}); if (entry) { return entry->target.get(); } + stats.misses[type]++; return nullptr; } @@ -146,31 +101,27 @@ SimpleBTB::lookup(ThreadID tid, Addr instPC, BranchType type) const StaticInstPtr SimpleBTB::getInst(ThreadID tid, Addr instPC) { - BTBEntry *entry = findEntry(instPC, tid); + BTBEntry *entry = btb.findEntry({instPC, tid}); if (entry) { return entry->inst; } + return nullptr; } void SimpleBTB::update(ThreadID tid, Addr instPC, - const PCStateBase &target, - BranchType type, StaticInstPtr inst) + const PCStateBase &target, + BranchType type, StaticInstPtr inst) { - unsigned btb_idx = getIndex(instPC, tid); - - assert(btb_idx < numEntries); - stats.updates[type]++; - btb[btb_idx].tid = tid; - btb[btb_idx].valid = true; - set(btb[btb_idx].target, target); - btb[btb_idx].tag = getTag(instPC); - btb[btb_idx].inst = inst; + BTBEntry *victim = btb.findVictim({instPC, tid}); + + btb.insertEntry({instPC, tid}, victim); + victim->update(target, inst); } -} // namespace branch_prediction -} // namespace gem5 + +} // namespace gem5::branch_prediction diff --git a/src/cpu/pred/simple_btb.hh b/src/cpu/pred/simple_btb.hh index 3c76890348..b1ef2a9fa5 100644 --- a/src/cpu/pred/simple_btb.hh +++ b/src/cpu/pred/simple_btb.hh @@ -41,15 +41,16 @@ #ifndef __CPU_PRED_SIMPLE_BTB_HH__ #define __CPU_PRED_SIMPLE_BTB_HH__ +#include "base/cache/associative_cache.hh" #include "base/logging.hh" #include "base/types.hh" #include "cpu/pred/btb.hh" +#include "cpu/pred/btb_entry.hh" +#include "mem/cache/replacement_policies/replaceable_entry.hh" +#include "mem/cache/tags/indexing_policies/base.hh" #include "params/SimpleBTB.hh" -namespace gem5 -{ - -namespace branch_prediction +namespace gem5::branch_prediction { class SimpleBTB : public BranchTargetBuffer @@ -60,44 +61,13 @@ class SimpleBTB : public BranchTargetBuffer void memInvalidate() override; bool valid(ThreadID tid, Addr instPC) override; const PCStateBase *lookup(ThreadID tid, Addr instPC, - BranchType type = BranchType::NoBranch) override; + BranchType type = BranchType::NoBranch) override; void update(ThreadID tid, Addr instPC, const PCStateBase &target_pc, - BranchType type = BranchType::NoBranch, - StaticInstPtr inst = nullptr) override; + BranchType type = BranchType::NoBranch, + StaticInstPtr inst = nullptr) override; const StaticInstPtr getInst(ThreadID tid, Addr instPC) override; - private: - struct BTBEntry - { - /** The entry's tag. */ - Addr tag = 0; - - /** The entry's target. */ - std::unique_ptr target; - - /** The entry's thread id. */ - ThreadID tid; - - /** Whether or not the entry is valid. */ - bool valid = false; - - /** Pointer to the static branch instruction at this address */ - StaticInstPtr inst = nullptr; - }; - - - /** Returns the index into the BTB, based on the branch's PC. - * @param inst_PC The branch to look up. - * @return Returns the index into the BTB. - */ - inline unsigned getIndex(Addr instPC, ThreadID tid); - - /** Returns the tag bits of a given address. - * @param inst_PC The branch's address. - * @return Returns the tag bits. - */ - inline Addr getTag(Addr instPC); /** Internal call to find an address in the BTB * @param instPC The branch's address. @@ -106,31 +76,9 @@ class SimpleBTB : public BranchTargetBuffer BTBEntry *findEntry(Addr instPC, ThreadID tid); /** The actual BTB. */ - std::vector btb; - - /** The number of entries in the BTB. */ - unsigned numEntries; - - /** The index mask. */ - unsigned idxMask; - - /** The number of tag bits per entry. */ - unsigned tagBits; - - /** The tag mask. */ - unsigned tagMask; - - /** Number of bits to shift PC when calculating index. */ - unsigned instShiftAmt; - - /** Number of bits to shift PC when calculating tag. */ - unsigned tagShiftAmt; - - /** Log2 NumThreads used for hashing threadid */ - unsigned log2NumThreads; + AssociativeCache btb; }; -} // namespace branch_prediction -} // namespace gem5 +} // namespace gem5::branch_prediction #endif // __CPU_PRED_SIMPLE_BTB_HH__