mem-cache: Prefetchers Improvements (#872)
This pull request contains a set of small patches which fix some bugs in the gem5 prefetchers, and aligns out-of-the box prefetcher performance more closely with that which a typical user would expect. The performance patches have been tested with an out-of-the-box (untuned) Stride prefetcher configuration against a set of SPEC 2017 SimPoints, and show a small-to-modest IPC uplift across the about half the benchmarks, with no significant IPC degradation. The new defaults were identified as part of work on gem5 prefetchers undertaken by Nikolaos Kyparissas while on internship at Arm. This PR is an updated version of PR #564, which was reverted due to Bug #580. Bug #580 was fixed in PR #871. This PR updates #564 to the latest state of the develop branch, and should be applied after PR #871.
This commit is contained in:
11
src/mem/cache/prefetch/Prefetcher.py
vendored
11
src/mem/cache/prefetch/Prefetcher.py
vendored
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2012, 2014, 2019, 2023 ARM Limited
|
||||
# Copyright (c) 2012, 2014, 2019, 2022-2024 Arm Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -79,7 +79,7 @@ class BasePrefetcher(ClockedObject):
|
||||
"Notify the hardware prefetcher on every access (not just misses)",
|
||||
)
|
||||
prefetch_on_pf_hit = Param.Bool(
|
||||
False,
|
||||
True,
|
||||
"Notify the hardware prefetcher on hit on prefetched lines",
|
||||
)
|
||||
use_virtual_addresses = Param.Bool(
|
||||
@@ -191,6 +191,13 @@ class StridePrefetcher(QueuedPrefetcher):
|
||||
use_requestor_id = Param.Bool(True, "Use requestor id based history")
|
||||
|
||||
degree = Param.Int(4, "Number of prefetches to generate")
|
||||
distance = Param.Unsigned(
|
||||
0,
|
||||
"How far ahead of the demand stream to start prefetching. "
|
||||
"Skip this number of strides ahead of the first identified prefetch, "
|
||||
"then generate `degree` prefetches at `stride` intervals. "
|
||||
"A value of zero indicates no skip.",
|
||||
)
|
||||
|
||||
table_assoc = Param.Int(4, "Associativity of the PC table")
|
||||
table_entries = Param.MemorySize("64", "Number of entries of the PC table")
|
||||
|
||||
3
src/mem/cache/prefetch/base.cc
vendored
3
src/mem/cache/prefetch/base.cc
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2014, 2023-2024 Arm Limited
|
||||
* Copyright (c) 2013-2014, 2022-2024 Arm Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -238,6 +238,7 @@ Base::probeNotify(const CacheAccessProbeArg &acc, bool miss)
|
||||
// operations or for writes that we are coaslescing.
|
||||
if (pkt->cmd.isSWPrefetch()) return;
|
||||
if (pkt->req->isCacheMaintenance()) return;
|
||||
if (pkt->isCleanEviction()) return;
|
||||
if (pkt->isWrite() && cache.coalesce()) return;
|
||||
if (!pkt->req->hasPaddr()) {
|
||||
panic("Request must have a physical address");
|
||||
|
||||
4
src/mem/cache/prefetch/queued.cc
vendored
4
src/mem/cache/prefetch/queued.cc
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2015, 2023 ARM Limited
|
||||
* Copyright (c) 2014-2015, 2022-2023 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -180,7 +180,7 @@ Queued::notify(const CacheAccessProbeArg &acc, const PrefetchInfo &pfi)
|
||||
if (queueSquash) {
|
||||
auto itr = pfq.begin();
|
||||
while (itr != pfq.end()) {
|
||||
if (itr->pfInfo.getAddr() == blk_addr &&
|
||||
if (blockAddress(itr->pfInfo.getAddr()) == blk_addr &&
|
||||
itr->pfInfo.isSecure() == is_secure) {
|
||||
DPRINTF(HWPrefetch, "Removing pf candidate addr: %#x "
|
||||
"(cl: %#x), demand request going to the same addr\n",
|
||||
|
||||
18
src/mem/cache/prefetch/stride.cc
vendored
18
src/mem/cache/prefetch/stride.cc
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Inria
|
||||
* Copyright (c) 2012-2013, 2015 ARM Limited
|
||||
* Copyright (c) 2012-2013, 2015, 2022-2023 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -84,6 +84,7 @@ Stride::Stride(const StridePrefetcherParams &p)
|
||||
threshConf(p.confidence_threshold/100.0),
|
||||
useRequestorId(p.use_requestor_id),
|
||||
degree(p.degree),
|
||||
distance(p.distance),
|
||||
pcTableInfo(p.table_assoc, p.table_entries, p.table_indexing_policy,
|
||||
p.table_replacement_policy)
|
||||
{
|
||||
@@ -168,15 +169,16 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi,
|
||||
return;
|
||||
}
|
||||
|
||||
// Round strides up to atleast 1 cacheline
|
||||
int prefetch_stride = new_stride;
|
||||
if (abs(new_stride) < blkSize) {
|
||||
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
|
||||
}
|
||||
|
||||
Addr new_addr = pf_addr + distance * prefetch_stride;
|
||||
// Generate up to degree prefetches
|
||||
for (int d = 1; d <= degree; d++) {
|
||||
// Round strides up to atleast 1 cacheline
|
||||
int prefetch_stride = new_stride;
|
||||
if (abs(new_stride) < blkSize) {
|
||||
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
|
||||
}
|
||||
|
||||
Addr new_addr = pf_addr + d * prefetch_stride;
|
||||
new_addr += prefetch_stride;
|
||||
addresses.push_back(AddrPriority(new_addr, 0));
|
||||
}
|
||||
} else {
|
||||
|
||||
10
src/mem/cache/prefetch/stride.hh
vendored
10
src/mem/cache/prefetch/stride.hh
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018 Inria
|
||||
* Copyright (c) 2012-2013, 2015 ARM Limited
|
||||
* Copyright (c) 2012-2013, 2015, 2022 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -105,6 +105,14 @@ class Stride : public Queued
|
||||
|
||||
const int degree;
|
||||
|
||||
/** How far ahead of the demand stream to start prefetching.
|
||||
*
|
||||
* Skip this number of strides ahead of the first identified
|
||||
* prefetch, then generate `degree` prefetches at `stride`
|
||||
* intervals. A value of zero indicates no skip.
|
||||
*/
|
||||
const int distance;
|
||||
|
||||
/**
|
||||
* Information used to create a new PC table. All of them behave equally.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user