mem-cache: Prefetchers Improvements (#872)

This pull request contains a set of small patches which fix some bugs in
the gem5 prefetchers, and aligns out-of-the box prefetcher performance
more closely with that which a typical user would expect.

The performance patches have been tested with an out-of-the-box
(untuned) Stride prefetcher configuration against a set of SPEC 2017
SimPoints, and show a small-to-modest IPC uplift across the about half
the benchmarks, with no significant IPC degradation.

The new defaults were identified as part of work on gem5 prefetchers
undertaken by Nikolaos Kyparissas while on internship at Arm.

This PR is an updated version of PR #564, which was reverted due to Bug
#580. Bug #580 was fixed in PR #871. This PR updates #564 to the latest
state of the develop branch, and should be applied after PR #871.
This commit is contained in:
Giacomo Travaglini
2024-03-04 09:09:47 +00:00
committed by GitHub
5 changed files with 32 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2012, 2014, 2019, 2023 ARM Limited
# Copyright (c) 2012, 2014, 2019, 2022-2024 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -79,7 +79,7 @@ class BasePrefetcher(ClockedObject):
"Notify the hardware prefetcher on every access (not just misses)",
)
prefetch_on_pf_hit = Param.Bool(
False,
True,
"Notify the hardware prefetcher on hit on prefetched lines",
)
use_virtual_addresses = Param.Bool(
@@ -191,6 +191,13 @@ class StridePrefetcher(QueuedPrefetcher):
use_requestor_id = Param.Bool(True, "Use requestor id based history")
degree = Param.Int(4, "Number of prefetches to generate")
distance = Param.Unsigned(
0,
"How far ahead of the demand stream to start prefetching. "
"Skip this number of strides ahead of the first identified prefetch, "
"then generate `degree` prefetches at `stride` intervals. "
"A value of zero indicates no skip.",
)
table_assoc = Param.Int(4, "Associativity of the PC table")
table_entries = Param.MemorySize("64", "Number of entries of the PC table")

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014, 2023-2024 Arm Limited
* Copyright (c) 2013-2014, 2022-2024 Arm Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -238,6 +238,7 @@ Base::probeNotify(const CacheAccessProbeArg &acc, bool miss)
// operations or for writes that we are coaslescing.
if (pkt->cmd.isSWPrefetch()) return;
if (pkt->req->isCacheMaintenance()) return;
if (pkt->isCleanEviction()) return;
if (pkt->isWrite() && cache.coalesce()) return;
if (!pkt->req->hasPaddr()) {
panic("Request must have a physical address");

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015, 2023 ARM Limited
* Copyright (c) 2014-2015, 2022-2023 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -180,7 +180,7 @@ Queued::notify(const CacheAccessProbeArg &acc, const PrefetchInfo &pfi)
if (queueSquash) {
auto itr = pfq.begin();
while (itr != pfq.end()) {
if (itr->pfInfo.getAddr() == blk_addr &&
if (blockAddress(itr->pfInfo.getAddr()) == blk_addr &&
itr->pfInfo.isSecure() == is_secure) {
DPRINTF(HWPrefetch, "Removing pf candidate addr: %#x "
"(cl: %#x), demand request going to the same addr\n",

View File

@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018 Inria
* Copyright (c) 2012-2013, 2015 ARM Limited
* Copyright (c) 2012-2013, 2015, 2022-2023 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -84,6 +84,7 @@ Stride::Stride(const StridePrefetcherParams &p)
threshConf(p.confidence_threshold/100.0),
useRequestorId(p.use_requestor_id),
degree(p.degree),
distance(p.distance),
pcTableInfo(p.table_assoc, p.table_entries, p.table_indexing_policy,
p.table_replacement_policy)
{
@@ -168,15 +169,16 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi,
return;
}
// Round strides up to atleast 1 cacheline
int prefetch_stride = new_stride;
if (abs(new_stride) < blkSize) {
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
}
Addr new_addr = pf_addr + distance * prefetch_stride;
// Generate up to degree prefetches
for (int d = 1; d <= degree; d++) {
// Round strides up to atleast 1 cacheline
int prefetch_stride = new_stride;
if (abs(new_stride) < blkSize) {
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
}
Addr new_addr = pf_addr + d * prefetch_stride;
new_addr += prefetch_stride;
addresses.push_back(AddrPriority(new_addr, 0));
}
} else {

View File

@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018 Inria
* Copyright (c) 2012-2013, 2015 ARM Limited
* Copyright (c) 2012-2013, 2015, 2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -105,6 +105,14 @@ class Stride : public Queued
const int degree;
/** How far ahead of the demand stream to start prefetching.
*
* Skip this number of strides ahead of the first identified
* prefetch, then generate `degree` prefetches at `stride`
* intervals. A value of zero indicates no skip.
*/
const int distance;
/**
* Information used to create a new PC table. All of them behave equally.
*/