mem-cache: A Best-Offset Prefetcher

Michaud, P. (2015, June). A best-offset prefetcher.
In 2nd Data Prefetching Championship.

Change-Id: I61bb89ca5639356d54aeb04e856d5bf6e8805c22
Reviewed-on: https://gem5-review.googlesource.com/c/14820
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
This commit is contained in:
Ivan Pizarro
2018-12-03 23:03:01 +01:00
parent f4d3080f45
commit d63f735663
8 changed files with 465 additions and 3 deletions

View File

@@ -478,6 +478,7 @@ BaseCache::recvTimingResp(PacketPtr pkt)
writeAllocator->allocate() : mshr->allocOnFill();
blk = handleFill(pkt, blk, writebacks, allocate);
assert(blk != nullptr);
ppFill->notify(pkt);
}
if (blk && blk->isValid() && pkt->isClean() && !pkt->isInvalidate()) {
@@ -2223,6 +2224,7 @@ BaseCache::regProbePoints()
{
ppHit = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Hit");
ppMiss = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Miss");
ppFill = new ProbePointArg<PacketPtr>(this->getProbeManager(), "Fill");
}
///////////////

View File

@@ -331,6 +331,9 @@ class BaseCache : public MemObject
/** To probe when a cache miss occurs */
ProbePointArg<PacketPtr> *ppMiss;
/** To probe when a cache fill occurs */
ProbePointArg<PacketPtr> *ppFill;
/**
* The writeAllocator drive optimizations for streaming writes.
* It first determines whether a WriteReq MSHR should be delayed,

View File

@@ -341,3 +341,25 @@ class SlimAMPMPrefetcher(QueuedPrefetcher):
dcpt = Param.DeltaCorrelatingPredictionTables(
SlimDeltaCorrelatingPredictionTables(),
"Delta Correlating Prediction Tables object")
class BOPPrefetcher(QueuedPrefetcher):
type = "BOPPrefetcher"
cxx_class = "BOPPrefetcher"
cxx_header = "mem/cache/prefetch/bop.hh"
score_max = Param.Unsigned(31, "Max. score to update the best offset")
round_max = Param.Unsigned(100, "Max. round to update the best offset")
bad_score = Param.Unsigned(10, "Score at which the HWP is disabled")
rr_size = Param.Unsigned(64, "Number of entries of each RR bank")
tag_bits = Param.Unsigned(12, "Bits used to store the tag")
offset_list_size = Param.Unsigned(46,
"Number of entries in the offsets list")
negative_offsets_enable = Param.Bool(True,
"Initialize the offsets list also with negative values \
(i.e. the table will have half of the entries with positive \
offsets and the other half with negative ones)")
delay_queue_enable = Param.Bool(True, "Enable the delay queue")
delay_queue_size = Param.Unsigned(15,
"Number of entries in the delay queue")
delay_queue_cycles = Param.Cycles(60,
"Cycles to delay a write in the left RR table from the delay \
queue")

View File

@@ -34,6 +34,7 @@ SimObject('Prefetcher.py')
Source('access_map_pattern_matching.cc')
Source('base.cc')
Source('bop.cc')
Source('delta_correlating_prediction_tables.cc')
Source('irregular_stream_buffer.cc')
Source('queued.cc')

View File

@@ -72,7 +72,11 @@ BasePrefetcher::PrefetchInfo::PrefetchInfo(PrefetchInfo const &pfi, Addr addr)
void
BasePrefetcher::PrefetchListener::notify(const PacketPtr &pkt)
{
parent.probeNotify(pkt);
if (isFill) {
parent.notifyFill(pkt);
} else {
parent.probeNotify(pkt);
}
}
BasePrefetcher::BasePrefetcher(const BasePrefetcherParams *p)
@@ -224,6 +228,7 @@ BasePrefetcher::regProbeListeners()
if (listeners.empty() && cache != nullptr) {
ProbeManager *pm(cache->getProbeManager());
listeners.push_back(new PrefetchListener(*this, pm, "Miss"));
listeners.push_back(new PrefetchListener(*this, pm, "Fill", true));
if (prefetchOnAccess) {
listeners.push_back(new PrefetchListener(*this, pm, "Hit"));
}

View File

@@ -67,12 +67,13 @@ class BasePrefetcher : public ClockedObject
{
public:
PrefetchListener(BasePrefetcher &_parent, ProbeManager *pm,
const std::string &name)
const std::string &name, bool _isFill = false)
: ProbeListenerArgBase(pm, name),
parent(_parent) {}
parent(_parent), isFill(_isFill) {}
void notify(const PacketPtr &pkt) override;
protected:
BasePrefetcher &parent;
bool isFill;
};
std::vector<PrefetchListener *> listeners;
@@ -253,6 +254,10 @@ class BasePrefetcher : public ClockedObject
*/
virtual void notify(const PacketPtr &pkt, const PrefetchInfo &pfi) = 0;
/** Notify prefetcher of cache fill */
virtual void notifyFill(const PacketPtr &pkt)
{}
virtual PacketPtr getPacket() = 0;
virtual Tick nextPrefetchReadyTime() const = 0;

267
src/mem/cache/prefetch/bop.cc vendored Normal file
View File

@@ -0,0 +1,267 @@
/**
* Copyright (c) 2018 Metempsy Technology Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Ivan Pizarro
*/
#include "mem/cache/prefetch/bop.hh"
#include "debug/HWPrefetch.hh"
#include "params/BOPPrefetcher.hh"
BOPPrefetcher::BOPPrefetcher(const BOPPrefetcherParams *p)
: QueuedPrefetcher(p),
scoreMax(p->score_max), roundMax(p->round_max),
badScore(p->bad_score), rrEntries(p->rr_size),
tagMask((1 << p->tag_bits) - 1),
delayQueueEnabled(p->delay_queue_enable),
delayQueueSize(p->delay_queue_size),
delayTicks(cyclesToTicks(p->delay_queue_cycles)),
delayQueueEvent([this]{ delayQueueEventWrapper(); }, name()),
issuePrefetchRequests(false), bestOffset(1), phaseBestOffset(0),
bestScore(0), round(0)
{
if (!isPowerOf2(rrEntries)) {
fatal("%s: number of RR entries is not power of 2\n", name());
}
if (!isPowerOf2(blkSize)) {
fatal("%s: cache line size is not power of 2\n", name());
}
if (!(p->negative_offsets_enable && (p->offset_list_size % 2 == 0))) {
fatal("%s: negative offsets enabled with odd offset list size\n",
name());
}
rrLeft.resize(rrEntries);
rrRight.resize(rrEntries);
// Following the paper implementation, a list with the specified number
// of offsets which are of the form 2^i * 3^j * 5^k with i,j,k >= 0
const int factors[] = { 2, 3, 5 };
unsigned int i = 0;
int64_t offset_i = 1;
while (i < p->offset_list_size)
{
int64_t offset = offset_i;
for (int n : factors) {
while ((offset % n) == 0) {
offset /= n;
}
}
if (offset == 1) {
offsetsList.push_back(OffsetListEntry(offset_i, 0));
i++;
// If we want to use negative offsets, add also the negative value
// of the offset just calculated
if (p->negative_offsets_enable) {
offsetsList.push_back(OffsetListEntry(-offset_i, 0));
i++;
}
}
offset_i++;
}
offsetsListIterator = offsetsList.begin();
}
void
BOPPrefetcher::delayQueueEventWrapper()
{
while (!delayQueue.empty() &&
delayQueue.front().processTick <= curTick())
{
Addr addr_x = delayQueue.front().baseAddr;
insertIntoRR(addr_x, RRWay::Left);
delayQueue.pop_front();
}
// Schedule an event for the next element if there is one
if (!delayQueue.empty()) {
schedule(delayQueueEvent, delayQueue.front().processTick);
}
}
unsigned int
BOPPrefetcher::hash(Addr addr, unsigned int way) const
{
Addr hash1 = addr >> way;
Addr hash2 = hash1 >> floorLog2(rrEntries);
return (hash1 ^ hash2) & (Addr)(rrEntries - 1);
}
void
BOPPrefetcher::insertIntoRR(Addr addr, unsigned int way)
{
switch (way) {
case RRWay::Left:
rrLeft[hash(addr, RRWay::Left)] = addr;
break;
case RRWay::Right:
rrRight[hash(addr, RRWay::Right)] = addr;
break;
}
}
void
BOPPrefetcher::insertIntoDelayQueue(Addr x)
{
if (delayQueue.size() == delayQueueSize) {
return;
}
// Add the address to the delay queue and schedule an event to process
// it after the specified delay cycles
Tick process_tick = curTick() + delayTicks;
delayQueue.push_back(DelayQueueEntry(x, process_tick));
if (!delayQueueEvent.scheduled()) {
schedule(delayQueueEvent, process_tick);
}
}
void
BOPPrefetcher::resetScores()
{
for (auto& it : offsetsList) {
it.second = 0;
}
}
inline Addr
BOPPrefetcher::tag(Addr addr) const
{
return (addr >> blkSize) & tagMask;
}
bool
BOPPrefetcher::testRR(Addr addr) const
{
for (auto& it : rrLeft) {
if (it == addr) {
return true;
}
}
for (auto& it : rrRight) {
if (it == addr) {
return true;
}
}
return false;
}
void
BOPPrefetcher::bestOffsetLearning(Addr x)
{
Addr offset_addr = (*offsetsListIterator).first;
Addr lookup_addr = x - offset_addr;
// There was a hit in the RR table, increment the score for this offset
if (testRR(lookup_addr)) {
DPRINTF(HWPrefetch, "Address %#lx found in the RR table\n", x);
(*offsetsListIterator).second++;
if ((*offsetsListIterator).second > bestScore) {
bestScore = (*offsetsListIterator).second;
phaseBestOffset = (*offsetsListIterator).first;
DPRINTF(HWPrefetch, "New best score is %lu\n", bestScore);
}
}
offsetsListIterator++;
// All the offsets in the list were visited meaning that a learning
// phase finished. Check if
if (offsetsListIterator == offsetsList.end()) {
offsetsListIterator = offsetsList.begin();
round++;
// Check if the best offset must be updated if:
// (1) One of the scores equals SCORE_MAX
// (2) The number of rounds equals ROUND_MAX
if ((bestScore >= scoreMax) || (round == roundMax)) {
bestOffset = phaseBestOffset;
round = 0;
bestScore = 0;
phaseBestOffset = 0;
resetScores();
issuePrefetchRequests = true;
} else if (phaseBestOffset <= badScore) {
issuePrefetchRequests = false;
}
}
}
void
BOPPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses)
{
Addr addr = pfi.getAddr();
Addr tag_x = tag(addr);
if (delayQueueEnabled) {
insertIntoDelayQueue(tag_x);
} else {
insertIntoRR(tag_x, RRWay::Left);
}
// Go through the nth offset and update the score, the best score and the
// current best offset if a better one is found
bestOffsetLearning(tag_x);
// This prefetcher is a degree 1 prefetch, so it will only generate one
// prefetch at most per access
if (issuePrefetchRequests) {
Addr prefetch_addr = addr + (bestOffset << lBlkSize);
addresses.push_back(AddrPriority(prefetch_addr, 0));
DPRINTF(HWPrefetch, "Generated prefetch %#lx\n", prefetch_addr);
}
}
void
BOPPrefetcher::notifyFill(const PacketPtr& pkt)
{
// Only insert into the RR right way if it's the pkt is a HWP
if (!pkt->cmd.isHWPrefetch()) return;
Addr tag_y = tag(pkt->getAddr());
if (issuePrefetchRequests) {
insertIntoRR(tag_y - bestOffset, RRWay::Right);
}
}
BOPPrefetcher*
BOPPrefetcherParams::create()
{
return new BOPPrefetcher(this);
}

157
src/mem/cache/prefetch/bop.hh vendored Normal file
View File

@@ -0,0 +1,157 @@
/**
* Copyright (c) 2018 Metempsy Technology Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Ivan Pizarro
*/
/**
* Implementation of the 'A Best-Offset Prefetcher'
* Reference:
* Michaud, P. (2015, June). A best-offset prefetcher.
* In 2nd Data Prefetching Championship.
*/
#ifndef __MEM_CACHE_PREFETCH_BOP_HH__
#define __MEM_CACHE_PREFETCH_BOP_HH__
#include <queue>
#include "mem/cache/prefetch/queued.hh"
#include "mem/packet.hh"
struct BOPPrefetcherParams;
class BOPPrefetcher : public QueuedPrefetcher
{
private:
enum RRWay {
Left,
Right
};
/** Learning phase parameters */
const unsigned int scoreMax;
const unsigned int roundMax;
const unsigned int badScore;
/** Recent requests table parameteres */
const unsigned int rrEntries;
const unsigned int tagMask;
/** Delay queue parameters */
const bool delayQueueEnabled;
const unsigned int delayQueueSize;
const unsigned int delayTicks;
std::vector<Addr> rrLeft;
std::vector<Addr> rrRight;
/** Structure to save the offset and the score */
typedef std::pair<int16_t, uint8_t> OffsetListEntry;
std::vector<OffsetListEntry> offsetsList;
/** In a first implementation of the BO prefetcher, both banks of the
* RR were written simultaneously when a prefetched line is inserted
* into the cache. Adding the delay queue tries to avoid always
* striving for timeless prefetches, which has been found to not
* always being optimal.
*/
struct DelayQueueEntry
{
Addr baseAddr;
Tick processTick;
DelayQueueEntry(Addr x, Tick t) : baseAddr(x), processTick(t)
{}
};
std::deque<DelayQueueEntry> delayQueue;
/** Event to handle the delay queue processing */
void delayQueueEventWrapper();
EventFunctionWrapper delayQueueEvent;
/** Hardware prefetcher enabled */
bool issuePrefetchRequests;
/** Current best offset to issue prefetches */
Addr bestOffset;
/** Current best offset found in the learning phase */
Addr phaseBestOffset;
/** Current test offset index */
std::vector<OffsetListEntry>::iterator offsetsListIterator;
/** Max score found so far */
unsigned int bestScore;
/** Current round */
unsigned int round;
/** Generate a hash for the specified address to index the RR table
* @param addr: address to hash
* @param way: RR table to which is addressed (left/right)
*/
unsigned int hash(Addr addr, unsigned int way) const;
/** Insert the specified address into the RR table
* @param addr: address to insert
* @param way: RR table to which the address will be inserted
*/
void insertIntoRR(Addr addr, unsigned int way);
/** Insert the specified address into the delay queue. This will
* trigger an event after the delay cycles pass
* @param addr: address to insert into the delay queue
*/
void insertIntoDelayQueue(Addr addr);
/** Reset all the scores from the offset list */
void resetScores();
/** Generate the tag for the specified address based on the tag bits
* and the block size
* @param addr: address to get the tag from
*/
Addr tag(Addr addr) const;
/** Test if @X-O is hitting in the RR table to update the
offset score */
bool testRR(Addr) const;
/** Learning phase of the BOP. Update the intermediate values of the
round and update the best offset if found */
void bestOffsetLearning(Addr);
/** Update the RR right table after a prefetch fill */
void notifyFill(const PacketPtr& pkt) override;
public:
BOPPrefetcher(const BOPPrefetcherParams *p);
~BOPPrefetcher() {}
void calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses);
};
#endif /* __MEM_CACHE_PREFETCH_BOP_HH__ */