mem-cache: Prefetchers Improvements (#564)

This pull request contains a set of small patches which fix some bugs in
the gem5 prefetchers, and aligns out-of-the box prefetcher performance
more closely with that which a typical user would expect.

The performance patches have been tested with an out-of-the-box
(untuned) Stride prefetcher configuration against a set of SPEC 2017
SimPoints, and show a modest IPC uplift across the board, with no IPC
degradation.

The new defaults were identified as part of work on gem5 prefetchers
undertaken by Nikolaos Kyparissas while on internship at Arm.
This commit is contained in:
Jason Lowe-Power
2023-11-16 15:22:26 -08:00
committed by GitHub
6 changed files with 27 additions and 16 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2012-2013, 2015, 2018 ARM Limited
# Copyright (c) 2012-2013, 2015, 2018, 2022 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -112,7 +112,7 @@ class BaseCache(ClockedObject):
"Notify the hardware prefetcher on every access (not just misses)",
)
prefetch_on_pf_hit = Param.Bool(
False, "Notify the hardware prefetcher on hit on prefetched lines"
True, "Notify the hardware prefetcher on hit on prefetched lines"
)
tags = Param.BaseTags(BaseSetAssoc(), "Tag store")

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2012, 2014, 2019 ARM Limited
# Copyright (c) 2012, 2014, 2019, 2022 Arm Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -192,6 +192,13 @@ class StridePrefetcher(QueuedPrefetcher):
use_requestor_id = Param.Bool(True, "Use requestor id based history")
degree = Param.Int(4, "Number of prefetches to generate")
distance = Param.Unsigned(
0,
"How far ahead of the demand stream to start prefetching. "
"Skip this number of strides ahead of the first identified prefetch, "
"then generate `degree` prefetches at `stride` intervals. "
"A value of zero indicates no skip.",
)
table_assoc = Param.Int(4, "Associativity of the PC table")
table_entries = Param.MemorySize("64", "Number of entries of the PC table")

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014 ARM Limited
* Copyright (c) 2013-2014, 2022 Arm Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@@ -245,6 +245,7 @@ Base::probeNotify(const PacketPtr &pkt, bool miss)
// operations or for writes that we are coaslescing.
if (pkt->cmd.isSWPrefetch()) return;
if (pkt->req->isCacheMaintenance()) return;
if (pkt->isCleanEviction()) return;
if (pkt->isWrite() && cache != nullptr && cache->coalesce()) return;
if (!pkt->req->hasPaddr()) {
panic("Request must have a physical address");

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2015 ARM Limited
* Copyright (c) 2014-2015, 2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -178,7 +178,7 @@ Queued::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
if (queueSquash) {
auto itr = pfq.begin();
while (itr != pfq.end()) {
if (itr->pfInfo.getAddr() == blk_addr &&
if (blockAddress(itr->pfInfo.getAddr()) == blk_addr &&
itr->pfInfo.isSecure() == is_secure) {
DPRINTF(HWPrefetch, "Removing pf candidate addr: %#x "
"(cl: %#x), demand request going to the same addr\n",

View File

@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018 Inria
* Copyright (c) 2012-2013, 2015 ARM Limited
* Copyright (c) 2012-2013, 2015, 2022-2023 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -84,6 +84,7 @@ Stride::Stride(const StridePrefetcherParams &p)
threshConf(p.confidence_threshold/100.0),
useRequestorId(p.use_requestor_id),
degree(p.degree),
distance(p.distance),
pcTableInfo(p.table_assoc, p.table_entries, p.table_indexing_policy,
p.table_replacement_policy)
{
@@ -167,16 +168,16 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi,
return;
}
// Round strides up to atleast 1 cacheline
int prefetch_stride = new_stride;
if (abs(new_stride) < blkSize) {
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
}
Addr new_addr = pf_addr + distance * prefetch_stride;
// Generate up to degree prefetches
for (int d = 1; d <= degree; d++) {
// Round strides up to atleast 1 cacheline
int prefetch_stride = new_stride;
if (abs(new_stride) < blkSize) {
prefetch_stride = (new_stride < 0) ? -blkSize : blkSize;
}
Addr new_addr = pf_addr + d * prefetch_stride;
addresses.push_back(AddrPriority(new_addr, 0));
addresses.push_back(AddrPriority(new_addr += prefetch_stride, 0));
}
} else {
// Miss in table

View File

@@ -1,6 +1,6 @@
/*
* Copyright (c) 2018 Inria
* Copyright (c) 2012-2013, 2015 ARM Limited
* Copyright (c) 2012-2013, 2015, 2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -105,6 +105,8 @@ class Stride : public Queued
const int degree;
const int distance;
/**
* Information used to create a new PC table. All of them behave equally.
*/