mem: Rework the structuring of the prefetchers

Re-organizes the prefetcher class structure. Previously the
BasePrefetcher forced multiple assumptions on the prefetchers that
inherited from it. This patch makes the BasePrefetcher class truly
representative of base functionality. For example, the base class no
longer enforces FIFO order. Instead, prefetchers with FIFO requests
(like the existing stride and tagged prefetchers) now inherit from a
new QueuedPrefetcher base class.

Finally, the stride-based prefetcher now assumes a custimizable lookup table
(sets/ways) rather than the previous fully associative structure.
This commit is contained in:
Mitch Hayenga
2014-12-23 09:31:18 -05:00
parent 6cb58b2bd2
commit df82a2d003
11 changed files with 598 additions and 486 deletions

View File

@@ -535,7 +535,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
bool satisfied = access(pkt, blk, lat, writebacks);
// track time of availability of next prefetch, if any
Tick next_pf_time = 0;
Tick next_pf_time = MaxTick;
bool needsResponse = pkt->needsResponse();
@@ -548,7 +548,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
// Don't notify on SWPrefetch
if (!pkt->cmd.isSWPrefetch())
next_pf_time = prefetcher->notify(pkt, time);
next_pf_time = prefetcher->notify(pkt);
}
if (needsResponse) {
@@ -648,7 +648,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
if (prefetcher) {
// Don't notify on SWPrefetch
if (!pkt->cmd.isSWPrefetch())
next_pf_time = prefetcher->notify(pkt, time);
next_pf_time = prefetcher->notify(pkt);
}
}
} else {
@@ -688,12 +688,12 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
if (prefetcher) {
// Don't notify on SWPrefetch
if (!pkt->cmd.isSWPrefetch())
next_pf_time = prefetcher->notify(pkt, time);
next_pf_time = prefetcher->notify(pkt);
}
}
}
if (next_pf_time != 0)
if (next_pf_time != MaxTick)
requestMemSideBus(Request_PF, std::max(time, next_pf_time));
// copy writebacks to write buffer

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2012 ARM Limited
# Copyright (c) 2012, 2014 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -37,6 +37,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Ron Dreslinski
# Mitch Hayenga
from ClockedObject import ClockedObject
from m5.params import *
@@ -46,39 +47,46 @@ class BasePrefetcher(ClockedObject):
type = 'BasePrefetcher'
abstract = True
cxx_header = "mem/cache/prefetch/base.hh"
size = Param.Int(100,
"Number of entries in the hardware prefetch queue")
cross_pages = Param.Bool(False,
"Allow prefetches to cross virtual page boundaries")
serial_squash = Param.Bool(False,
"Squash prefetches with a later time on a subsequent miss")
degree = Param.Int(1,
"Degree of the prefetch depth")
latency = Param.Cycles('1', "Latency of the prefetcher")
use_master_id = Param.Bool(True,
"Use the master id to separate calculations of prefetches")
data_accesses_only = Param.Bool(False,
"Only prefetch on data not on instruction accesses")
on_miss_only = Param.Bool(False,
"Only prefetch on miss (as opposed to always)")
on_read_only = Param.Bool(False,
"Only prefetch on read requests (write requests ignored)")
on_prefetch = Param.Bool(True,
"Let lower cache prefetcher train on prefetch requests")
inst_tagged = Param.Bool(True,
"Perform a tagged prefetch for instruction fetches always")
sys = Param.System(Parent.any, "System this prefetcher belongs to")
class StridePrefetcher(BasePrefetcher):
on_miss = Param.Bool(False, "Only notify prefetcher on misses")
on_read = Param.Bool(True, "Notify prefetcher on reads")
on_write = Param.Bool(True, "Notify prefetcher on writes")
on_data = Param.Bool(True, "Notify prefetcher on data accesses")
on_inst = Param.Bool(True, "Notify prefetcher on instruction accesses")
class QueuedPrefetcher(BasePrefetcher):
type = "QueuedPrefetcher"
abstract = True
cxx_class = "QueuedPrefetcher"
cxx_header = "mem/cache/prefetch/queued.hh"
latency = Param.Int(1, "Latency for generated prefetches")
queue_size = Param.Int(32, "Maximum number of queued prefetches")
queue_squash = Param.Bool(True, "Squash queued prefetch on demand access")
queue_filter = Param.Bool(True, "Don't queue redundant prefetches")
cache_snoop = Param.Bool(False, "Snoop cache to eliminate redundant request")
tag_prefetch = Param.Bool(True, "Tag prefetch with PC of generating access")
class StridePrefetcher(QueuedPrefetcher):
type = 'StridePrefetcher'
cxx_class = 'StridePrefetcher'
cxx_header = "mem/cache/prefetch/stride.hh"
class TaggedPrefetcher(BasePrefetcher):
max_conf = Param.Int(7, "Maximum confidence level")
thresh_conf = Param.Int(4, "Threshold confidence level")
min_conf = Param.Int(0, "Minimum confidence level")
start_conf = Param.Int(4, "Starting confidence for new entries")
table_sets = Param.Int(16, "Number of sets in PC lookup table")
table_assoc = Param.Int(4, "Associativity of PC lookup table")
use_master_id = Param.Bool(True, "Use master id based history")
degree = Param.Int(4, "Number of prefetches to generate")
class TaggedPrefetcher(QueuedPrefetcher):
type = 'TaggedPrefetcher'
cxx_class = 'TaggedPrefetcher'
cxx_header = "mem/cache/prefetch/tagged.hh"
degree = Param.Int(2, "Number of prefetches to generate")

View File

@@ -33,6 +33,7 @@ Import('*')
SimObject('Prefetcher.py')
Source('base.cc')
Source('queued.cc')
Source('stride.cc')
Source('tagged.cc')

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 ARM Limited
* Copyright (c) 2013-2014 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Ron Dreslinski
* Mitch Hayenga
*/
/**
@@ -47,20 +48,14 @@
#include <list>
#include "base/trace.hh"
#include "debug/HWPrefetch.hh"
#include "mem/cache/prefetch/base.hh"
#include "mem/cache/base.hh"
#include "mem/request.hh"
#include "sim/system.hh"
BasePrefetcher::BasePrefetcher(const Params *p)
: ClockedObject(p), size(p->size), cache(nullptr), blkSize(0),
latency(p->latency), degree(p->degree),
useMasterId(p->use_master_id), pageStop(!p->cross_pages),
serialSquash(p->serial_squash), onlyData(p->data_accesses_only),
onMissOnly(p->on_miss_only), onReadOnly(p->on_read_only),
onPrefetch(p->on_prefetch), system(p->sys),
BasePrefetcher::BasePrefetcher(const BasePrefetcherParams *p)
: ClockedObject(p), cache(nullptr), blkSize(0), system(p->sys),
onMiss(p->on_miss), onRead(p->on_read),
onWrite(p->on_write), onData(p->on_data), onInst(p->on_inst),
masterId(system->getMasterId(name())),
pageBytes(system->getPageBytes())
{
@@ -77,239 +72,52 @@ BasePrefetcher::setCache(BaseCache *_cache)
void
BasePrefetcher::regStats()
{
pfIdentified
.name(name() + ".prefetcher.num_hwpf_identified")
.desc("number of hwpf identified")
;
pfMSHRHit
.name(name() + ".prefetcher.num_hwpf_already_in_mshr")
.desc("number of hwpf that were already in mshr")
;
pfCacheHit
.name(name() + ".prefetcher.num_hwpf_already_in_cache")
.desc("number of hwpf that were already in the cache")
;
pfBufferHit
.name(name() + ".prefetcher.num_hwpf_already_in_prefetcher")
.desc("number of hwpf that were already in the prefetch queue")
;
pfRemovedFull
.name(name() + ".prefetcher.num_hwpf_evicted")
.desc("number of hwpf removed due to no buffer left")
;
pfRemovedMSHR
.name(name() + ".prefetcher.num_hwpf_removed_MSHR_hit")
.desc("number of hwpf removed because MSHR allocated")
;
pfIssued
.name(name() + ".prefetcher.num_hwpf_issued")
.name(name() + ".num_hwpf_issued")
.desc("number of hwpf issued")
;
pfSpanPage
.name(name() + ".prefetcher.num_hwpf_span_page")
.desc("number of hwpf spanning a virtual page")
;
pfSquashed
.name(name() + ".prefetcher.num_hwpf_squashed_from_miss")
.desc("number of hwpf that got squashed due to a miss "
"aborting calculation time")
;
}
inline bool
BasePrefetcher::inCache(Addr addr, bool is_secure)
bool
BasePrefetcher::observeAccess(const PacketPtr &pkt) const
{
Addr addr = pkt->getAddr();
bool fetch = pkt->req->isInstFetch();
bool read= pkt->isRead();
bool is_secure = pkt->isSecure();
if (pkt->req->isUncacheable()) return false;
if (fetch && !onInst) return false;
if (!fetch && !onData) return false;
if (!fetch && read && !onRead) return false;
if (!fetch && !read && !onWrite) return false;
if (onMiss) {
return !inCache(addr, is_secure) &&
!inMissQueue(addr, is_secure);
}
return true;
}
bool
BasePrefetcher::inCache(Addr addr, bool is_secure) const
{
if (cache->inCache(addr, is_secure)) {
pfCacheHit++;
return true;
}
return false;
}
inline bool
BasePrefetcher::inMissQueue(Addr addr, bool is_secure)
bool
BasePrefetcher::inMissQueue(Addr addr, bool is_secure) const
{
if (cache->inMissQueue(addr, is_secure)) {
pfMSHRHit++;
return true;
}
return false;
}
PacketPtr
BasePrefetcher::getPacket()
{
DPRINTF(HWPrefetch, "Requesting a hw_pf to issue\n");
if (pf.empty()) {
DPRINTF(HWPrefetch, "No HW_PF found\n");
return NULL;
}
PacketPtr pkt = pf.begin()->pkt;
while (!pf.empty()) {
pkt = pf.begin()->pkt;
pf.pop_front();
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
bool is_secure = pkt->isSecure();
if (!inCache(blk_addr, is_secure) && !inMissQueue(blk_addr, is_secure))
// we found a prefetch, return it
break;
DPRINTF(HWPrefetch, "addr 0x%x (%s) in cache, skipping\n",
pkt->getAddr(), is_secure ? "s" : "ns");
delete pkt->req;
delete pkt;
if (pf.empty()) {
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
return NULL; // None left, all were in cache
}
}
pfIssued++;
assert(pkt != NULL);
DPRINTF(HWPrefetch, "returning 0x%x (%s)\n", pkt->getAddr(),
pkt->isSecure() ? "s" : "ns");
return pkt;
}
Tick
BasePrefetcher::notify(PacketPtr &pkt, Tick tick)
{
// Don't consult the prefetcher if any of the following conditons are true
// 1) The request is uncacheable
// 2) The request is a fetch, but we are only prefeching data
// 3) The request is a cache hit, but we are only training on misses
// 4) THe request is a write, but we are only training on reads
if (!pkt->req->isUncacheable() && !(pkt->req->isInstFetch() && onlyData) &&
!(onMissOnly && inCache(pkt->getAddr(), true)) &&
!(onReadOnly && !pkt->isRead())) {
// Calculate the blk address
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1);
bool is_secure = pkt->isSecure();
// Check if miss is in pfq, if so remove it
std::list<DeferredPacket>::iterator iter = inPrefetch(blk_addr,
is_secure);
if (iter != pf.end()) {
DPRINTF(HWPrefetch, "Saw a miss to a queued prefetch addr: "
"0x%x (%s), removing it\n", blk_addr,
is_secure ? "s" : "ns");
pfRemovedMSHR++;
delete iter->pkt->req;
delete iter->pkt;
iter = pf.erase(iter);
if (pf.empty())
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
}
// Remove anything in queue with delay older than time
// since everything is inserted in time order, start from end
// and work until pf.empty() or time is earlier
// This is done to emulate Aborting the previous work on a new miss
// Needed for serial calculators like GHB
if (serialSquash) {
iter = pf.end();
if (iter != pf.begin())
iter--;
while (!pf.empty() && iter->tick >= tick) {
pfSquashed++;
DPRINTF(HWPrefetch, "Squashing old prefetch addr: 0x%x\n",
iter->pkt->getAddr());
delete iter->pkt->req;
delete iter->pkt;
iter = pf.erase(iter);
if (iter != pf.begin())
iter--;
}
if (pf.empty())
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
}
std::list<Addr> addresses;
std::list<Cycles> delays;
calculatePrefetch(pkt, addresses, delays);
std::list<Addr>::iterator addrIter = addresses.begin();
std::list<Cycles>::iterator delayIter = delays.begin();
for (; addrIter != addresses.end(); ++addrIter, ++delayIter) {
Addr addr = *addrIter;
pfIdentified++;
DPRINTF(HWPrefetch, "Found a pf candidate addr: 0x%x, "
"inserting into prefetch queue with delay %d time %d\n",
addr, *delayIter, time);
// Check if it is already in the pf buffer
if (inPrefetch(addr, is_secure) != pf.end()) {
pfBufferHit++;
DPRINTF(HWPrefetch, "Prefetch addr already in pf buffer\n");
continue;
}
// create a prefetch memreq
Request *prefetchReq = new Request(*addrIter, blkSize, 0, masterId);
if (is_secure)
prefetchReq->setFlags(Request::SECURE);
prefetchReq->taskId(ContextSwitchTaskId::Prefetcher);
PacketPtr prefetch =
new Packet(prefetchReq, MemCmd::HardPFReq);
prefetch->allocate();
prefetch->req->setThreadContext(pkt->req->contextId(),
pkt->req->threadId());
// Tag orefetch reqeuests with corresponding PC to train lower
// cache-level prefetchers
if (onPrefetch && pkt->req->hasPC())
prefetch->req->setPC(pkt->req->getPC());
// We just remove the head if we are full
if (pf.size() == size) {
pfRemovedFull++;
PacketPtr old_pkt = pf.begin()->pkt;
DPRINTF(HWPrefetch, "Prefetch queue full, "
"removing oldest 0x%x\n", old_pkt->getAddr());
delete old_pkt->req;
delete old_pkt;
pf.pop_front();
}
pf.push_back(DeferredPacket(tick + clockPeriod() * *delayIter,
prefetch));
}
}
return pf.empty() ? 0 : pf.front().tick;
}
std::list<BasePrefetcher::DeferredPacket>::iterator
BasePrefetcher::inPrefetch(Addr address, bool is_secure)
{
// Guaranteed to only be one match, we always check before inserting
std::list<DeferredPacket>::iterator iter;
for (iter = pf.begin(); iter != pf.end(); iter++) {
if (((*iter).pkt->getAddr() & ~(Addr)(blkSize-1)) == address &&
(*iter).pkt->isSecure() == is_secure) {
return iter;
}
}
return pf.end();
}
bool
BasePrefetcher::samePage(Addr a, Addr b) const
{

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 ARM Limited
* Copyright (c) 2013-2014 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -38,6 +38,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Ron Dreslinski
* Mitch Hayenga
*/
/**
@@ -45,14 +46,12 @@
* Miss and writeback queue declarations.
*/
#ifndef __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
#define __MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
#include <list>
#ifndef __MEM_CACHE_PREFETCH_BASE_HH__
#define __MEM_CACHE_PREFETCH_BASE_HH__
#include "base/statistics.hh"
#include "mem/packet.hh"
#include "params/BaseCache.hh"
#include "params/BasePrefetcher.hh"
#include "sim/clocked_object.hh"
class BaseCache;
@@ -61,82 +60,54 @@ class BasePrefetcher : public ClockedObject
{
protected:
/** A deferred packet, buffered to transmit later. */
class DeferredPacket {
public:
Tick tick; ///< The tick when the packet is ready to transmit
PacketPtr pkt; ///< Pointer to the packet to transmit
DeferredPacket(Tick t, PacketPtr p)
: tick(t), pkt(p)
{}
};
/** The Prefetch Queue. */
std::list<DeferredPacket> pf;
// PARAMETERS
/** The number of MSHRs in the Prefetch Queue. */
const unsigned size;
/** Pointr to the parent cache. */
BaseCache* cache;
/** The block size of the parent cache. */
unsigned blkSize;
/** The latency before a prefetch is issued */
const Cycles latency;
/** The number of prefetches to issue */
const unsigned degree;
/** If patterns should be found per context id */
const bool useMasterId;
/** Do we prefetch across page boundaries. */
const bool pageStop;
/** Do we remove prefetches with later times than a new miss.*/
const bool serialSquash;
/** Do we prefetch on only data reads, or on inst reads as well. */
const bool onlyData;
/** Do we trigger/train prefetch on cache misses only, or all accesses. */
const bool onMissOnly;
/** Do we trigger/train prefetch on reads only, or all accesses. */
const bool onReadOnly;
/** Do we tag prefetch's with PC addresses, allowing lower pc-based
prefetchers to prefetch on prefetch requests */
const bool onPrefetch;
/** System we belong to */
System* system;
/** Only consult prefetcher on cache misses? */
bool onMiss;
/** Consult prefetcher on reads? */
bool onRead;
/** Consult prefetcher on reads? */
bool onWrite;
/** Consult prefetcher on data accesses? */
bool onData;
/** Consult prefetcher on instruction accesses? */
bool onInst;
/** Request id for prefetches */
MasterID masterId;
const Addr pageBytes;
public:
/** Determine if this access should be observed */
bool observeAccess(const PacketPtr &pkt) const;
/** Determine if address is in cache */
bool inCache(Addr addr, bool is_secure) const;
/** Determine if address is in cache miss queue */
bool inMissQueue(Addr addr, bool is_secure) const;
/** Determine if addresses are on the same page */
bool samePage(Addr a, Addr b) const;
Stats::Scalar pfIdentified;
Stats::Scalar pfMSHRHit;
Stats::Scalar pfCacheHit;
Stats::Scalar pfBufferHit;
Stats::Scalar pfRemovedFull;
Stats::Scalar pfRemovedMSHR;
Stats::Scalar pfIssued;
Stats::Scalar pfSpanPage;
Stats::Scalar pfSquashed;
void regStats();
public:
typedef BasePrefetcherParams Params;
BasePrefetcher(const Params *p);
BasePrefetcher(const BasePrefetcherParams *p);
virtual ~BasePrefetcher() {}
@@ -145,42 +116,14 @@ class BasePrefetcher : public ClockedObject
/**
* Notify prefetcher of cache access (may be any access or just
* misses, depending on cache parameters.)
* @retval Time of next prefetch availability, or 0 if none.
* @retval Time of next prefetch availability, or MaxTick if none.
*/
Tick notify(PacketPtr &pkt, Tick tick);
virtual Tick notify(const PacketPtr &pkt) = 0;
bool inCache(Addr addr, bool is_secure);
virtual PacketPtr getPacket() = 0;
bool inMissQueue(Addr addr, bool is_secure);
PacketPtr getPacket();
bool havePending()
{
return !pf.empty();
}
Tick nextPrefetchReadyTime()
{
return pf.empty() ? MaxTick : pf.front().tick;
}
virtual void calculatePrefetch(PacketPtr &pkt,
std::list<Addr> &addresses,
std::list<Cycles> &delays) = 0;
std::list<DeferredPacket>::iterator inPrefetch(Addr address, bool is_secure);
/**
* Utility function: are addresses a and b on the same VM page?
*/
bool samePage(Addr a, Addr b) const;
public:
const Params*
params() const
{
return dynamic_cast<const Params *>(_params);
}
virtual Tick nextPrefetchReadyTime() const = 0;
virtual void regStats();
};
#endif //__MEM_CACHE_PREFETCH_BASE_PREFETCHER_HH__
#endif //__MEM_CACHE_PREFETCH_BASE_HH__

213
src/mem/cache/prefetch/queued.cc vendored Normal file
View File

@@ -0,0 +1,213 @@
/*
* Copyright (c) 2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Mitch Hayenga
*/
#include "debug/HWPrefetch.hh"
#include "mem/cache/prefetch/queued.hh"
#include "mem/cache/base.hh"
QueuedPrefetcher::QueuedPrefetcher(const QueuedPrefetcherParams *p)
: BasePrefetcher(p), queueSize(p->queue_size), latency(p->latency),
queueSquash(p->queue_squash), queueFilter(p->queue_filter),
cacheSnoop(p->cache_snoop), tagPrefetch(p->tag_prefetch)
{
}
QueuedPrefetcher::~QueuedPrefetcher()
{
// Delete the queued prefetch packets
for (DeferredPacket &p : pfq) {
delete p.pkt->req;
delete p.pkt;
}
}
Tick
QueuedPrefetcher::notify(const PacketPtr &pkt)
{
// Verify this access type is observed by prefetcher
if (observeAccess(pkt)) {
Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize - 1);
bool is_secure = pkt->isSecure();
// Squash queued prefetches if demand miss to same line
if (queueSquash) {
auto itr = pfq.begin();
while (itr != pfq.end()) {
if (itr->pkt->getAddr() == blk_addr &&
itr->pkt->isSecure() == is_secure) {
delete itr->pkt->req;
delete itr->pkt;
itr = pfq.erase(itr);
} else {
++itr;
}
}
if (pfq.empty())
cache->deassertMemSideBusRequest(BaseCache::Request_PF);
}
// Calculate prefetches given this access
std::vector<Addr> addresses;
calculatePrefetch(pkt, addresses);
// Queue up generated prefetches
for (Addr pf_addr : addresses) {
// Block align prefetch address
pf_addr = pf_addr & ~(Addr)(blkSize - 1);
pfIdentified++;
DPRINTF(HWPrefetch, "Found a pf candidate addr: %#x, "
"inserting into prefetch queue.\n", pf_addr);
if (queueFilter && inPrefetch(pf_addr, is_secure)) {
pfBufferHit++;
DPRINTF(HWPrefetch, "Prefetch addr already in "
"prefetch queue\n");
continue;
}
if (cacheSnoop && (inCache(pf_addr, is_secure) ||
inMissQueue(pf_addr, is_secure))) {
pfInCache++;
DPRINTF(HWPrefetch, "Dropping redundant in "
"cache/MSHR prefetch addr:%#x\n", pf_addr);
continue;
}
// Create a prefetch memory request
Request *pf_req =
new Request(pf_addr, blkSize, 0, masterId);
if (is_secure) {
pf_req->setFlags(Request::SECURE);
}
pf_req->taskId(ContextSwitchTaskId::Prefetcher);
PacketPtr pf_pkt = new Packet(pf_req, MemCmd::HardPFReq);
pf_pkt->allocate();
if (pkt->req->hasContextId()) {
pf_req->setThreadContext(pkt->req->contextId(),
pkt->req->threadId());
}
if (tagPrefetch && pkt->req->hasPC()) {
// Tag prefetch packet with accessing pc
pf_pkt->req->setPC(pkt->req->getPC());
}
// Verify prefetch buffer space for request
if (pfq.size() == queueSize) {
pfRemovedFull++;
PacketPtr old_pkt = pfq.begin()->pkt;
DPRINTF(HWPrefetch, "Prefetch queue full, removing "
"oldest packet addr: %#x", old_pkt->getAddr());
delete old_pkt->req;
delete old_pkt;
pfq.pop_front();
}
Tick pf_time = curTick() + clockPeriod() * latency;
DPRINTF(HWPrefetch, "Prefetch queued. "
"addr:%#x tick:%lld.\n", pf_addr, pf_time);
pfq.push_back(DeferredPacket(pf_time, pf_pkt));
}
}
return pfq.empty() ? MaxTick : pfq.front().tick;
}
PacketPtr
QueuedPrefetcher::getPacket()
{
DPRINTF(HWPrefetch, "Requesting a prefetch to issue.\n");
if (pfq.empty()) {
DPRINTF(HWPrefetch, "No hardware prefetches available.\n");
return NULL;
}
PacketPtr pkt = pfq.begin()->pkt;
pfq.pop_front();
pfIssued++;
assert(pkt != NULL);
DPRINTF(HWPrefetch, "Generating prefetch for %#x.\n", pkt->getAddr());
return pkt;
}
bool
QueuedPrefetcher::inPrefetch(Addr address, bool is_secure) const
{
for (const DeferredPacket &dp : pfq) {
if (dp.pkt->getAddr() == address &&
dp.pkt->isSecure() == is_secure) return true;
}
return false;
}
void
QueuedPrefetcher::regStats()
{
BasePrefetcher::regStats();
pfIdentified
.name(name() + ".pfIdentified")
.desc("number of prefetch candidates identified");
pfBufferHit
.name(name() + ".pfBufferHit")
.desc("number of redundant prefetches already in prefetch queue");
pfInCache
.name(name() + ".pfInCache")
.desc("number of redundant prefetches already in cache/mshr dropped");
pfRemovedFull
.name(name() + ".pfRemovedFull")
.desc("number of prefetches dropped due to prefetch queue size");
pfSpanPage
.name(name() + ".pfSpanPage")
.desc("number of prefetches not generated due to page crossing");
}

108
src/mem/cache/prefetch/queued.hh vendored Normal file
View File

@@ -0,0 +1,108 @@
/*
* Copyright (c) 2014 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Mitch Hayenga
*/
#ifndef __MEM_CACHE_PREFETCH_QUEUED_HH__
#define __MEM_CACHE_PREFETCH_QUEUED_HH__
#include <list>
#include "mem/cache/prefetch/base.hh"
#include "params/QueuedPrefetcher.hh"
class QueuedPrefetcher : public BasePrefetcher
{
protected:
struct DeferredPacket {
Tick tick;
PacketPtr pkt;
DeferredPacket(Tick t, PacketPtr p) : tick(t), pkt(p) {}
};
std::list<DeferredPacket> pfq;
// PARAMETERS
/** Maximum size of the prefetch queue */
const unsigned queueSize;
/** Cycles after generation when a prefetch can first be issued */
const Cycles latency;
/** Squash queued prefetch if demand access observed */
const bool queueSquash;
/** Filter prefetches if already queued */
const bool queueFilter;
/** Snoop the cache before generating prefetch (cheating basically) */
const bool cacheSnoop;
/** Tag prefetch with PC of generating access? */
const bool tagPrefetch;
bool inPrefetch(Addr address, bool is_secure) const;
// STATS
Stats::Scalar pfIdentified;
Stats::Scalar pfBufferHit;
Stats::Scalar pfInCache;
Stats::Scalar pfRemovedFull;
Stats::Scalar pfSpanPage;
public:
QueuedPrefetcher(const QueuedPrefetcherParams *p);
virtual ~QueuedPrefetcher();
Tick notify(const PacketPtr &pkt);
// Note: This should really be pure virtual, but doesnt go well with params
virtual void calculatePrefetch(const PacketPtr &pkt,
std::vector<Addr> &addresses) = 0;
PacketPtr getPacket();
Tick nextPrefetchReadyTime() const
{
return pfq.empty() ? MaxTick : pfq.front().tick;
}
void regStats();
};
#endif //__MEM_CACHE_PREFETCH_QUEUED_HH__

View File

@@ -46,133 +46,162 @@
* Stride Prefetcher template instantiations.
*/
#include "base/trace.hh"
#include "debug/HWPrefetch.hh"
#include "mem/cache/prefetch/stride.hh"
StridePrefetcher::StridePrefetcher(const StridePrefetcherParams *p)
: QueuedPrefetcher(p),
maxConf(p->max_conf),
threshConf(p->thresh_conf),
minConf(p->min_conf),
startConf(p->start_conf),
pcTableAssoc(p->table_assoc),
pcTableSets(p->table_sets),
useMasterId(p->use_master_id),
degree(p->degree)
{
// Don't consult stride prefetcher on instruction accesses
onInst = false;
assert(isPowerOf2(pcTableSets));
for (int c = 0; c < maxContexts; c++) {
pcTable[c] = new StrideEntry*[pcTableSets];
for (int s = 0; s < pcTableSets; s++) {
pcTable[c][s] = new StrideEntry[pcTableAssoc];
}
}
}
StridePrefetcher::~StridePrefetcher()
{
for (int c = 0; c < maxContexts; c++) {
for (int s = 0; s < pcTableSets; s++) {
delete[] pcTable[c][s];
}
}
}
void
StridePrefetcher::calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Cycles> &delays)
StridePrefetcher::calculatePrefetch(const PacketPtr &pkt,
std::vector<Addr> &addresses)
{
if (!pkt->req->hasPC()) {
DPRINTF(HWPrefetch, "ignoring request with no PC");
DPRINTF(HWPrefetch, "Ignoring request with no PC.\n");
return;
}
Addr data_addr = pkt->getAddr();
// Get required packet info
Addr pkt_addr = pkt->getAddr();
Addr pc = pkt->req->getPC();
bool is_secure = pkt->isSecure();
MasterID master_id = useMasterId ? pkt->req->masterId() : 0;
Addr pc = pkt->req->getPC();
assert(master_id < Max_Contexts);
std::list<StrideEntry*> &tab = table[master_id];
// Revert to simple N-block ahead prefetch for instruction fetches
if (instTagged && pkt->req->isInstFetch()) {
for (int d = 1; d <= degree; d++) {
Addr new_addr = data_addr + d * blkSize;
if (pageStop && !samePage(data_addr, new_addr)) {
// Spanned the page, so now stop
pfSpanPage += degree - d + 1;
return;
}
DPRINTF(HWPrefetch, "queuing prefetch to %x @ %d\n",
new_addr, latency);
addresses.push_back(new_addr);
delays.push_back(latency);
}
return;
}
assert(master_id < maxContexts);
/* Scan Table for instAddr Match */
std::list<StrideEntry*>::iterator iter;
for (iter = tab.begin(); iter != tab.end(); iter++) {
// Entries have to match on the security state as well
if ((*iter)->instAddr == pc && (*iter)->isSecure == is_secure)
break;
}
// Lookup pc-based information
StrideEntry *entry;
if (iter != tab.end()) {
if(pcTableHit(pc, is_secure, master_id, entry)) {
// Hit in table
int new_stride = pkt_addr - entry->lastAddr;
bool stride_match = (new_stride == entry->stride);
int new_stride = data_addr - (*iter)->missAddr;
bool stride_match = (new_stride == (*iter)->stride);
// Adjust confidence for stride entry
if (stride_match && new_stride != 0) {
(*iter)->tolerance = true;
if ((*iter)->confidence < Max_Conf)
(*iter)->confidence++;
if (entry->confidence < maxConf)
entry->confidence++;
} else {
if (!((*iter)->tolerance)) {
(*iter)->stride = new_stride;
if ((*iter)->confidence > Min_Conf)
(*iter)->confidence = 0;
} else {
(*iter)->tolerance = false;
}
if (entry->confidence > minConf)
entry->confidence--;
// If confidence has dropped below the threshold, train new stride
if (entry->confidence < threshConf)
entry->stride = new_stride;
}
DPRINTF(HWPrefetch, "hit: PC %x data_addr %x (%s) stride %d (%s), "
"conf %d\n", pc, data_addr, is_secure ? "s" : "ns", new_stride,
DPRINTF(HWPrefetch, "Hit: PC %x pkt_addr %x (%s) stride %d (%s), "
"conf %d\n", pc, pkt_addr, is_secure ? "s" : "ns", new_stride,
stride_match ? "match" : "change",
(*iter)->confidence);
entry->confidence);
(*iter)->missAddr = data_addr;
(*iter)->isSecure = is_secure;
entry->lastAddr = pkt_addr;
if ((*iter)->confidence <= 0)
// Abort prefetch generation if below confidence threshold
if (entry->confidence < threshConf)
return;
// Generate up to degree prefetches
for (int d = 1; d <= degree; d++) {
Addr new_addr = data_addr + d * (*iter)->stride;
if (pageStop && !samePage(data_addr, new_addr)) {
// Spanned the page, so now stop
pfSpanPage += degree - d + 1;
return;
} else {
DPRINTF(HWPrefetch, " queuing prefetch to %x (%s) @ %d\n",
new_addr, is_secure ? "s" : "ns", latency);
// Round strides up to atleast 1 cacheline
int prefetch_stride = new_stride;
if (abs(new_stride) < blkSize) {
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
}
Addr new_addr = pkt_addr + d * prefetch_stride;
if (samePage(pkt_addr, new_addr)) {
DPRINTF(HWPrefetch, "Queuing prefetch to %#x.\n", new_addr);
addresses.push_back(new_addr);
delays.push_back(latency);
} else {
// Record the number of page crossing prefetches generated
pfSpanPage += degree - d + 1;
DPRINTF(HWPrefetch, "Ignoring page crossing prefetch.\n");
return;
}
}
} else {
// Miss in table
// Find lowest confidence and replace
DPRINTF(HWPrefetch, "miss: PC %x data_addr %x (%s)\n", pc, data_addr,
DPRINTF(HWPrefetch, "Miss: PC %x pkt_addr %x (%s)\n", pc, pkt_addr,
is_secure ? "s" : "ns");
if (tab.size() >= 256) { //set default table size is 256
std::list<StrideEntry*>::iterator min_pos = tab.begin();
int min_conf = (*min_pos)->confidence;
for (iter = min_pos, ++iter; iter != tab.end(); ++iter) {
if ((*iter)->confidence < min_conf){
min_pos = iter;
min_conf = (*iter)->confidence;
}
}
DPRINTF(HWPrefetch, " replacing PC %x (%s)\n",
(*min_pos)->instAddr, (*min_pos)->isSecure ? "s" : "ns");
// free entry and delete it
delete *min_pos;
tab.erase(min_pos);
}
StrideEntry *new_entry = new StrideEntry;
new_entry->instAddr = pc;
new_entry->missAddr = data_addr;
new_entry->isSecure = is_secure;
new_entry->stride = 0;
new_entry->confidence = 0;
new_entry->tolerance = false;
tab.push_back(new_entry);
StrideEntry* entry = pcTableVictim(pc, master_id);
entry->instAddr = pc;
entry->lastAddr = pkt_addr;
entry->isSecure= is_secure;
entry->stride = 0;
entry->confidence = startConf;
}
}
inline Addr
StridePrefetcher::pcHash(Addr pc) const
{
Addr hash1 = pc >> 1;
Addr hash2 = hash1 >> floorLog2(pcTableSets);
return (hash1 ^ hash2) & (Addr)(pcTableSets - 1);
}
inline StridePrefetcher::StrideEntry*
StridePrefetcher::pcTableVictim(Addr pc, int master_id)
{
// Rand replacement for now
int set = pcHash(pc);
int way = rand() % pcTableAssoc;
DPRINTF(HWPrefetch, "Victimizing lookup table[%d][%d].\n", set, way);
return &pcTable[master_id][set][way];
}
inline bool
StridePrefetcher::pcTableHit(Addr pc, bool is_secure, int master_id,
StrideEntry* &entry)
{
int set = pcHash(pc);
StrideEntry* set_entries = pcTable[master_id][set];
for (int way = 0; way < pcTableAssoc; way++) {
// Search ways for match
if (set_entries[way].instAddr == pc &&
set_entries[way].isSecure == is_secure) {
DPRINTF(HWPrefetch, "Lookup hit table[%d][%d].\n", set, way);
entry = &set_entries[way];
return true;
}
}
return false;
}
StridePrefetcher*
StridePrefetcherParams::create()
{
return new StridePrefetcher(this);
return new StridePrefetcher(this);
}

View File

@@ -45,51 +45,54 @@
* Describes a strided prefetcher.
*/
#ifndef __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
#define __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
#ifndef __MEM_CACHE_PREFETCH_STRIDE_HH__
#define __MEM_CACHE_PREFETCH_STRIDE_HH__
#include <climits>
#include "mem/cache/prefetch/base.hh"
#include "mem/cache/prefetch/queued.hh"
#include "params/StridePrefetcher.hh"
class StridePrefetcher : public BasePrefetcher
class StridePrefetcher : public QueuedPrefetcher
{
protected:
static const int maxContexts = 64;
static const int Max_Contexts = 64;
const int maxConf;
const int threshConf;
const int minConf;
const int startConf;
// These constants need to be changed with the type of the
// 'confidence' field below.
static const int Max_Conf = INT_MAX;
static const int Min_Conf = INT_MIN;
const int pcTableAssoc;
const int pcTableSets;
class StrideEntry
const bool useMasterId;
const int degree;
struct StrideEntry
{
public:
StrideEntry() : instAddr(0), lastAddr(0), isSecure(false), stride(0),
confidence(0)
{ }
Addr instAddr;
Addr missAddr;
Addr lastAddr;
bool isSecure;
int stride;
int confidence;
bool tolerance;
};
std::list<StrideEntry*> table[Max_Contexts];
StrideEntry **pcTable[maxContexts];
bool instTagged;
bool pcTableHit(Addr pc, bool is_secure, int master_id, StrideEntry* &entry);
StrideEntry* pcTableVictim(Addr pc, int master_id);
Addr pcHash(Addr pc) const;
public:
StridePrefetcher(const Params *p)
: BasePrefetcher(p), instTagged(p->inst_tagged)
{
}
StridePrefetcher(const StridePrefetcherParams *p);
~StridePrefetcher();
~StridePrefetcher() {}
void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Cycles> &delays);
void calculatePrefetch(const PacketPtr &pkt, std::vector<Addr> &addresses);
};
#endif // __MEM_CACHE_PREFETCH_STRIDE_PREFETCHER_HH__
#endif // __MEM_CACHE_PREFETCH_STRIDE_HH__

View File

@@ -35,32 +35,30 @@
#include "mem/cache/prefetch/tagged.hh"
TaggedPrefetcher::TaggedPrefetcher(const Params *p)
: BasePrefetcher(p)
TaggedPrefetcher::TaggedPrefetcher(const TaggedPrefetcherParams *p)
: QueuedPrefetcher(p), degree(p->degree)
{
}
void
TaggedPrefetcher::
calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Cycles> &delays)
TaggedPrefetcher::calculatePrefetch(const PacketPtr &pkt,
std::vector<Addr> &addresses)
{
Addr blkAddr = pkt->getAddr() & ~(Addr)(blkSize-1);
for (int d = 1; d <= degree; d++) {
Addr newAddr = blkAddr + d*(blkSize);
if (pageStop && !samePage(blkAddr, newAddr)) {
// Spanned the page, so now stop
if (!samePage(blkAddr, newAddr)) {
// Count number of unissued prefetches due to page crossing
pfSpanPage += degree - d + 1;
return;
} else {
addresses.push_back(newAddr);
delays.push_back(latency);
}
}
}
TaggedPrefetcher*
TaggedPrefetcherParams::create()
{

View File

@@ -33,23 +33,24 @@
* Describes a tagged prefetcher.
*/
#ifndef __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
#define __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
#ifndef __MEM_CACHE_PREFETCH_TAGGED_HH__
#define __MEM_CACHE_PREFETCH_TAGGED_HH__
#include "mem/cache/prefetch/base.hh"
#include "mem/cache/prefetch/queued.hh"
#include "params/TaggedPrefetcher.hh"
class TaggedPrefetcher : public BasePrefetcher
class TaggedPrefetcher : public QueuedPrefetcher
{
public:
protected:
int degree;
TaggedPrefetcher(const Params *p);
public:
TaggedPrefetcher(const TaggedPrefetcherParams *p);
~TaggedPrefetcher() {}
void calculatePrefetch(PacketPtr &pkt, std::list<Addr> &addresses,
std::list<Cycles> &delays);
void calculatePrefetch(const PacketPtr &pkt, std::vector<Addr> &addresses);
};
#endif // __MEM_CACHE_PREFETCH_TAGGED_PREFETCHER_HH__
#endif // __MEM_CACHE_PREFETCH_TAGGED_HH__