mem-cache: Access Map Pattern Matching Prefetcher

Implementation of the Access Map Pattern Matching prefetcher
Based in the description of the following paper:
  Access map pattern matching for high performance data cache prefetch.
  Ishii, Y., Inaba, M., & Hiraki, K. (2011).
  Journal of Instruction-Level Parallelism, 13, 1-24.

Change-Id: I0d4b7f7afc2ab4938bdd8755bfed26e26a28530c
Reviewed-on: https://gem5-review.googlesource.com/c/15096
Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Maintainer: Nikos Nikoleris <nikos.nikoleris@arm.com>
This commit is contained in:
Javier Bueno
2018-12-13 12:08:35 +01:00
committed by Javier Bueno Hedo
parent 3a3ad05aca
commit 6099502678
4 changed files with 469 additions and 0 deletions

View File

@@ -179,3 +179,37 @@ class SignaturePathPrefetcher(QueuedPrefetcher):
"Minimum confidence to issue prefetches")
lookahead_confidence_threshold = Param.Float(0.75,
"Minimum confidence to continue exploring lookahead entries")
class AccessMapPatternMatchingPrefetcher(QueuedPrefetcher):
type = 'AccessMapPatternMatchingPrefetcher'
cxx_class = 'AccessMapPatternMatchingPrefetcher'
cxx_header = "mem/cache/prefetch/access_map_pattern_matching.hh"
start_degree = Param.Unsigned(4,
"Initial degree (Maximum number of prefetches generated")
hot_zone_size = Param.MemorySize("2kB", "Memory covered by a hot zone")
access_map_table_entries = Param.MemorySize("256",
"Number of entries in the access map table")
access_map_table_assoc = Param.Unsigned(8,
"Associativity of the access map table")
access_map_table_indexing_policy = Param.BaseIndexingPolicy(
SetAssociative(entry_size = 1, assoc = Parent.access_map_table_assoc,
size = Parent.access_map_table_entries),
"Indexing policy of the access map table")
access_map_table_replacement_policy = Param.BaseReplacementPolicy(LRURP(),
"Replacement policy of the access map table")
high_coverage_threshold = Param.Float(0.25,
"A prefetch coverage factor bigger than this is considered high")
low_coverage_threshold = Param.Float(0.125,
"A prefetch coverage factor smaller than this is considered low")
high_accuracy_threshold = Param.Float(0.5,
"A prefetch accuracy factor bigger than this is considered high")
low_accuracy_threshold = Param.Float(0.25,
"A prefetch accuracy factor smaller than this is considered low")
high_cache_hit_threshold = Param.Float(0.875,
"A cache hit ratio bigger than this is considered high")
low_cache_hit_threshold = Param.Float(0.75,
"A cache hit ratio smaller than this is considered low")
epoch_cycles = Param.Cycles(256000, "Cycles in an epoch period")
offchip_memory_latency = Param.Latency("30ns",
"Memory latency used to compute the required memory bandwidth")

View File

@@ -32,6 +32,7 @@ Import('*')
SimObject('Prefetcher.py')
Source('access_map_pattern_matching.cc')
Source('base.cc')
Source('queued.cc')
Source('signature_path.cc')

View File

@@ -0,0 +1,252 @@
/**
* Copyright (c) 2018 Metempsy Technology Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Javier Bueno
*/
#include "mem/cache/prefetch/access_map_pattern_matching.hh"
#include "debug/HWPrefetch.hh"
#include "mem/cache/prefetch/associative_set_impl.hh"
#include "params/AccessMapPatternMatchingPrefetcher.hh"
AccessMapPatternMatchingPrefetcher::AccessMapPatternMatchingPrefetcher(
const AccessMapPatternMatchingPrefetcherParams *p)
: QueuedPrefetcher(p),
startDegree(p->start_degree), hotZoneSize(p->hot_zone_size),
highCoverageThreshold(p->high_coverage_threshold),
lowCoverageThreshold(p->low_coverage_threshold),
highAccuracyThreshold(p->high_accuracy_threshold),
lowAccuracyThreshold(p->low_accuracy_threshold),
highCacheHitThreshold(p->high_cache_hit_threshold),
lowCacheHitThreshold(p->low_cache_hit_threshold),
epochCycles(p->epoch_cycles),
offChipMemoryLatency(p->offchip_memory_latency),
accessMapTable(p->access_map_table_assoc, p->access_map_table_entries,
p->access_map_table_indexing_policy,
p->access_map_table_replacement_policy,
AccessMapEntry(hotZoneSize / blkSize)),
numGoodPrefetches(0), numTotalPrefetches(0), numRawCacheMisses(0),
numRawCacheHits(0), degree(startDegree), usefulDegree(startDegree),
epochEvent([this]{ processEpochEvent(); }, name())
{
fatal_if(!isPowerOf2(hotZoneSize),
"the hot zone size must be a power of 2");
if (!epochEvent.scheduled()) {
schedule(epochEvent, clockEdge(epochCycles));
}
}
void
AccessMapPatternMatchingPrefetcher::processEpochEvent()
{
schedule(epochEvent, clockEdge(epochCycles));
double prefetch_accuracy =
((double) numGoodPrefetches) / ((double) numTotalPrefetches);
double prefetch_coverage =
((double) numGoodPrefetches) / ((double) numRawCacheMisses);
double cache_hit_ratio = ((double) numRawCacheHits) /
((double) (numRawCacheHits + numRawCacheMisses));
double num_requests = (double) (numRawCacheMisses - numGoodPrefetches +
numTotalPrefetches);
double memory_bandwidth = num_requests * offChipMemoryLatency /
clockEdge(epochCycles);
if (prefetch_coverage > highCoverageThreshold &&
(prefetch_accuracy > highAccuracyThreshold ||
cache_hit_ratio < lowCacheHitThreshold)) {
usefulDegree += 1;
} else if ((prefetch_coverage < lowCoverageThreshold &&
(prefetch_accuracy < lowAccuracyThreshold ||
cache_hit_ratio > highCacheHitThreshold)) ||
(prefetch_accuracy < lowAccuracyThreshold &&
cache_hit_ratio > highCacheHitThreshold)) {
usefulDegree -= 1;
}
degree = std::min((unsigned) memory_bandwidth, usefulDegree);
// reset epoch stats
numGoodPrefetches = 0.0;
numTotalPrefetches = 0.0;
numRawCacheMisses = 0.0;
numRawCacheHits = 0.0;
}
AccessMapPatternMatchingPrefetcher::AccessMapEntry *
AccessMapPatternMatchingPrefetcher::getAccessMapEntry(Addr am_addr,
bool is_secure)
{
AccessMapEntry *am_entry = accessMapTable.findEntry(am_addr, is_secure);
if (am_entry != nullptr) {
accessMapTable.accessEntry(am_entry);
} else {
am_entry = accessMapTable.findVictim(am_addr);
assert(am_entry != nullptr);
accessMapTable.insertEntry(am_addr, is_secure, am_entry);
}
return am_entry;
}
void
AccessMapPatternMatchingPrefetcher::setEntryState(AccessMapEntry &entry,
Addr block, enum AccessMapState state)
{
enum AccessMapState old = entry.states[block];
entry.states[block] = state;
//do not update stats when initializing
if (state == AM_INIT) return;
switch (old) {
case AM_INIT:
if (state == AM_PREFETCH) {
numTotalPrefetches += 1;
} else if (state == AM_ACCESS) {
numRawCacheMisses += 1;
}
break;
case AM_PREFETCH:
if (state == AM_ACCESS) {
numGoodPrefetches += 1;
numRawCacheMisses += 1;
}
break;
case AM_ACCESS:
if (state == AM_ACCESS) {
numRawCacheHits += 1;
}
break;
default:
panic("Impossible path\n");
break;
}
}
void
AccessMapPatternMatchingPrefetcher::calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses)
{
assert(addresses.empty());
bool is_secure = pfi.isSecure();
Addr am_addr = pfi.getAddr() / hotZoneSize;
Addr current_block = (pfi.getAddr() % hotZoneSize) / blkSize;
uint64_t lines_per_zone = hotZoneSize / blkSize;
// Get the entries of the curent block (am_addr), the previous, and the
// following ones
AccessMapEntry *am_entry_curr = getAccessMapEntry(am_addr, is_secure);
AccessMapEntry *am_entry_prev = (am_addr > 0) ?
getAccessMapEntry(am_addr-1, is_secure) : nullptr;
AccessMapEntry *am_entry_next = (am_addr < (MaxAddr/hotZoneSize)) ?
getAccessMapEntry(am_addr+1, is_secure) : nullptr;
assert(am_entry_curr != am_entry_prev);
assert(am_entry_curr != am_entry_next);
assert(am_entry_prev != am_entry_next);
assert(am_entry_curr != nullptr);
//Mark the current access as Accessed
setEntryState(*am_entry_curr, current_block, AM_ACCESS);
/**
* Create a contiguous copy of the 3 entries states.
* With this, we avoid doing boundaries checking in the loop that looks
* for prefetch candidates, mark out of range positions with AM_INVALID
*/
std::vector<AccessMapState> states(3 * lines_per_zone);
for (unsigned idx = 0; idx < lines_per_zone; idx += 1) {
states[idx] =
am_entry_prev != nullptr ? am_entry_prev->states[idx] : AM_INVALID;
states[idx + lines_per_zone] = am_entry_curr->states[idx];
states[idx + 2 * lines_per_zone] =
am_entry_next != nullptr ? am_entry_next->states[idx] : AM_INVALID;
}
/**
* am_entry_prev->states => states[ 0 .. lines_per_zone-1]
* am_entry_curr->states => states[ lines_per_zone .. 2*lines_per_zone-1]
* am_entry_next->states => states[2*lines_per_zone .. 3*lines_per_zone-1]
*/
// index of the current_block in the new vector
Addr states_current_block = current_block + lines_per_zone;
// consider strides 1..lines_per_zone/2
for (int stride = 1; stride < lines_per_zone/2; stride += 1) {
// Test accessed positive strides
if (checkCandidate(states, states_current_block, stride)) {
// candidate found, current_block - stride
Addr pf_addr;
if (stride > current_block) {
// The index (current_block - stride) falls in the range of
// the previous zone (am_entry_prev), adjust the address
// accordingly
Addr blk = states_current_block - stride;
pf_addr = (am_addr - 1) * hotZoneSize + blk * blkSize;
setEntryState(*am_entry_prev, blk, AM_PREFETCH);
} else {
// The index (current_block - stride) falls within
// am_entry_curr
Addr blk = current_block - stride;
pf_addr = am_addr * hotZoneSize + blk * blkSize;
setEntryState(*am_entry_curr, blk, AM_PREFETCH);
}
addresses.push_back(AddrPriority(pf_addr, 0));
if (addresses.size() == degree) {
break;
}
}
// Test accessed negative strides
if (checkCandidate(states, states_current_block, -stride)) {
// candidate found, current_block + stride
Addr pf_addr;
if (current_block + stride >= lines_per_zone) {
// The index (current_block + stride) falls in the range of
// the next zone (am_entry_next), adjust the address
// accordingly
Addr blk = (states_current_block + stride) % lines_per_zone;
pf_addr = (am_addr + 1) * hotZoneSize + blk * blkSize;
setEntryState(*am_entry_next, blk, AM_PREFETCH);
} else {
// The index (current_block + stride) falls within
// am_entry_curr
Addr blk = current_block + stride;
pf_addr = am_addr * hotZoneSize + blk * blkSize;
setEntryState(*am_entry_curr, blk, AM_PREFETCH);
}
addresses.push_back(AddrPriority(pf_addr, 0));
if (addresses.size() == degree) {
break;
}
}
}
}
AccessMapPatternMatchingPrefetcher*
AccessMapPatternMatchingPrefetcherParams::create()
{
return new AccessMapPatternMatchingPrefetcher(this);
}

View File

@@ -0,0 +1,182 @@
/**
* Copyright (c) 2018 Metempsy Technology Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Javier Bueno
*/
/**
* Implementation of the Access Map Pattern Matching Prefetcher
*
* References:
* Access map pattern matching for high performance data cache prefetch.
* Ishii, Y., Inaba, M., & Hiraki, K. (2011).
* Journal of Instruction-Level Parallelism, 13, 1-24.
*/
#ifndef __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__
#define __MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__
#include "mem/cache/base.hh"
#include "mem/cache/prefetch/associative_set.hh"
#include "mem/cache/prefetch/queued.hh"
#include "mem/packet.hh"
struct AccessMapPatternMatchingPrefetcherParams;
class AccessMapPatternMatchingPrefetcher : public QueuedPrefetcher
{
/** Maximum number of prefetch generated */
const unsigned startDegree;
/** Amount of memory covered by a hot zone */
const uint64_t hotZoneSize;
/** A prefetch coverage factor bigger than this is considered high */
const double highCoverageThreshold;
/** A prefetch coverage factor smaller than this is considered low */
const double lowCoverageThreshold;
/** A prefetch accuracy factor bigger than this is considered high */
const double highAccuracyThreshold;
/** A prefetch accuracy factor smaller than this is considered low */
const double lowAccuracyThreshold;
/** A cache hit ratio bigger than this is considered high */
const double highCacheHitThreshold;
/** A cache hit ratio smaller than this is considered low */
const double lowCacheHitThreshold;
/** Cycles in an epoch period */
const Cycles epochCycles;
/** Off chip memory latency to use for the epoch bandwidth calculation */
const Tick offChipMemoryLatency;
/** Data type representing the state of a cacheline in the access map */
enum AccessMapState
{
AM_INIT,
AM_PREFETCH,
AM_ACCESS,
AM_INVALID
};
/** AccessMapEntry data type */
struct AccessMapEntry : public TaggedEntry
{
/** vector containing the state of the cachelines in this zone */
std::vector<AccessMapState> states;
AccessMapEntry(size_t num_entries) : states(num_entries, AM_INIT)
{}
/** Reset the entries to their initial values */
void reset() override
{
for (auto &entry : states) {
entry = AM_INIT;
}
}
};
/** Access map table */
AssociativeSet<AccessMapEntry> accessMapTable;
/**
* Number of good prefetches
* - State transitions from PREFETCH to ACCESS
*/
uint64_t numGoodPrefetches;
/**
* Number of prefetches issued
* - State transitions from INIT to PREFETCH
*/
uint64_t numTotalPrefetches;
/**
* Number of raw cache misses
* - State transitions from INIT or PREFETCH to ACCESS
*/
uint64_t numRawCacheMisses;
/**
* Number of raw cache hits
* - State transitions from ACCESS to ACCESS
*/
uint64_t numRawCacheHits;
/** Current degree */
unsigned degree;
/** Current useful degree */
unsigned usefulDegree;
/**
* Given a target cacheline, this function checks if the cachelines
* that follow the provided stride have been accessed. If so, the line
* is considered a good candidate.
* @param states vector containing the states of three contiguous hot zones
* @param current target block (cacheline)
* @param stride access stride to obtain the reference cachelines
* @return true if current is a prefetch candidate
*/
inline bool checkCandidate(std::vector<AccessMapState> const &states,
Addr current, int stride) const
{
enum AccessMapState tgt = states[current - stride];
enum AccessMapState s = states[current + stride];
enum AccessMapState s2 = states[current + 2 * stride];
enum AccessMapState s2_p1 = states[current + 2 * stride + 1];
return (tgt != AM_INVALID &&
((s == AM_ACCESS && s2 == AM_ACCESS) ||
(s == AM_ACCESS && s2_p1 == AM_ACCESS)));
}
/**
* Obtain an AccessMapEntry from the AccessMapTable, if the entry is not
* found a new one is initialized and inserted.
* @param am_addr address of the hot zone
* @param is_secure whether the address belongs to the secure memory area
* @return the corresponding entry
*/
AccessMapEntry *getAccessMapEntry(Addr am_addr, bool is_secure);
/**
* Updates the state of a block within an AccessMapEntry, also updates
* the prefetcher metrics.
* @param entry AccessMapEntry to update
* @param block cacheline within the hot zone
* @param state new state
*/
void setEntryState(AccessMapEntry &entry, Addr block,
enum AccessMapState state);
/**
* This event constitues the epoch of the statistics that keep track of
* the prefetcher accuracy, when this event triggers, the prefetcher degree
* is adjusted and the statistics counters are reset.
*/
void processEpochEvent();
EventFunctionWrapper epochEvent;
public:
AccessMapPatternMatchingPrefetcher(
const AccessMapPatternMatchingPrefetcherParams* p);
~AccessMapPatternMatchingPrefetcher() {}
void calculatePrefetch(const PrefetchInfo &pfi,
std::vector<AddrPriority> &addresses) override;
};
#endif//__MEM_CACHE_PREFETCH_ACCESS_MAP_PATTERN_MATCHING_HH__