From 8598764a0345ec4326061d94f917597b65e25c8f Mon Sep 17 00:00:00 2001 From: Richard Cooper Date: Mon, 14 Nov 2022 12:02:10 +0000 Subject: [PATCH 1/5] mem-cache: Squash prefetch queue entries by block address. Prefetch queue entries were being squashed by comparing the address of each queued prefetch against the block address of the demand access. Only prefetches that happen to fall on a cache-line block boundary would be squashed. This patch converts the prefetch addresses to block addresses before comparison. Change-Id: I55ecb4919e94ad314b91c7795bba257c550b1528 Reviewed-by: Giacomo Travaglini --- src/mem/cache/prefetch/queued.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc index 1ab34d2e9b..c67c315dad 100644 --- a/src/mem/cache/prefetch/queued.cc +++ b/src/mem/cache/prefetch/queued.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 ARM Limited + * Copyright (c) 2014-2015, 2022 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -178,7 +178,7 @@ Queued::notify(const PacketPtr &pkt, const PrefetchInfo &pfi) if (queueSquash) { auto itr = pfq.begin(); while (itr != pfq.end()) { - if (itr->pfInfo.getAddr() == blk_addr && + if (blockAddress(itr->pfInfo.getAddr()) == blk_addr && itr->pfInfo.isSecure() == is_secure) { DPRINTF(HWPrefetch, "Removing pf candidate addr: %#x " "(cl: %#x), demand request going to the same addr\n", From 6416304e07a6bcbcf338d20f8852923f050afa70 Mon Sep 17 00:00:00 2001 From: Richard Cooper Date: Mon, 14 Nov 2022 10:56:36 +0000 Subject: [PATCH 2/5] mem-cache: Update default prefetch options. Update the default prefetch options to achieve out-of-the box prefetcher performance closer to that which a typical user would expect. Configurations that set these parameters explicitly will be unaffected. The new defaults were identified as part of work on gem5 prefetchers undertaken by Nikolaos Kyparissas while on internship at Arm. Change-Id: Id63868c7c8f00ee15a0b09a6550780a45ae67e55 Reviewed-by: Giacomo Travaglini --- src/mem/cache/Cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py index d853a08cd9..501bb92682 100644 --- a/src/mem/cache/Cache.py +++ b/src/mem/cache/Cache.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013, 2015, 2018 ARM Limited +# Copyright (c) 2012-2013, 2015, 2018, 2022 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -112,7 +112,7 @@ class BaseCache(ClockedObject): "Notify the hardware prefetcher on every access (not just misses)", ) prefetch_on_pf_hit = Param.Bool( - False, "Notify the hardware prefetcher on hit on prefetched lines" + True, "Notify the hardware prefetcher on hit on prefetched lines" ) tags = Param.BaseTags(BaseSetAssoc(), "Tag store") From 38045d7a252c46f5f57692e5eff5b18dd4fd2547 Mon Sep 17 00:00:00 2001 From: Nikolaos Kyparissas Date: Tue, 30 Aug 2022 17:14:03 +0100 Subject: [PATCH 3/5] mem-cache: Added clean eviction check for prefetchers. pkt->req->isCacheMaintenance() would not include a check for clean eviction before notifying the prefetcher, causing gem5 to crash. Change-Id: I1d082a87a3908b1ed46c5d632d45d8b09950b382 Reviewed-by: Giacomo Travaglini --- src/mem/cache/prefetch/base.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc index e3e4b24cf2..25c37df323 100644 --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 ARM Limited + * Copyright (c) 2013-2014, 2022 Arm Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -245,6 +245,7 @@ Base::probeNotify(const PacketPtr &pkt, bool miss) // operations or for writes that we are coaslescing. if (pkt->cmd.isSWPrefetch()) return; if (pkt->req->isCacheMaintenance()) return; + if (pkt->isCleanEviction()) return; if (pkt->isWrite() && cache != nullptr && cache->coalesce()) return; if (!pkt->req->hasPaddr()) { panic("Request must have a physical address"); From 2abd65c2707695dad735c4e383b1ce7c61158314 Mon Sep 17 00:00:00 2001 From: Nikolaos Kyparissas Date: Tue, 30 Aug 2022 17:04:21 +0100 Subject: [PATCH 4/5] mem: added distance parameter to stride prefetcher The Stride Prefetcher will skip this number of strides ahead of the first identified prefetch, then generate `degree` prefetches at `stride` intervals. A value of zero indicates no skip (i.e. start prefetching from the next identified prefetch address). This parameter can be used to increase the timeliness of prefetches by starting to prefetch far enough ahead of the demand stream to cover the memory system latency. [Richard Cooper : - Added detail to commit comment and `distance` Param documentation. - Changed `distance` Param from `Param.Int` to `Param.Unsigned`. ] Change-Id: I6c4e744079b53a7b804d8eab93b0f07b566f0c08 Reviewed-by: Giacomo Travaglini Signed-off-by: Richard Cooper --- src/mem/cache/prefetch/Prefetcher.py | 9 ++++++++- src/mem/cache/prefetch/stride.cc | 6 ++++-- src/mem/cache/prefetch/stride.hh | 4 +++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index ecc67f4857..c15ef6539c 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012, 2014, 2019 ARM Limited +# Copyright (c) 2012, 2014, 2019, 2022 Arm Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -192,6 +192,13 @@ class StridePrefetcher(QueuedPrefetcher): use_requestor_id = Param.Bool(True, "Use requestor id based history") degree = Param.Int(4, "Number of prefetches to generate") + distance = Param.Unsigned( + 0, + "How far ahead of the demand stream to start prefetching. " + "Skip this number of strides ahead of the first identified prefetch, " + "then generate `degree` prefetches at `stride` intervals. " + "A value of zero indicates no skip.", + ) table_assoc = Param.Int(4, "Associativity of the PC table") table_entries = Param.MemorySize("64", "Number of entries of the PC table") diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index 0a77b28a1c..d21fcd160c 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -1,6 +1,6 @@ /* * Copyright (c) 2018 Inria - * Copyright (c) 2012-2013, 2015 ARM Limited + * Copyright (c) 2012-2013, 2015, 2022 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -84,6 +84,7 @@ Stride::Stride(const StridePrefetcherParams &p) threshConf(p.confidence_threshold/100.0), useRequestorId(p.use_requestor_id), degree(p.degree), + distance(p.distance), pcTableInfo(p.table_assoc, p.table_entries, p.table_indexing_policy, p.table_replacement_policy) { @@ -175,7 +176,8 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi, prefetch_stride = (new_stride < 0) ? -blkSize : blkSize; } - Addr new_addr = pf_addr + d * prefetch_stride; + Addr new_addr = pf_addr + distance * prefetch_stride + + d * prefetch_stride; addresses.push_back(AddrPriority(new_addr, 0)); } } else { diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh index 7e55abea21..35ba4eed4e 100644 --- a/src/mem/cache/prefetch/stride.hh +++ b/src/mem/cache/prefetch/stride.hh @@ -1,6 +1,6 @@ /* * Copyright (c) 2018 Inria - * Copyright (c) 2012-2013, 2015 ARM Limited + * Copyright (c) 2012-2013, 2015, 2022 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -105,6 +105,8 @@ class Stride : public Queued const int degree; + const int distance; + /** * Information used to create a new PC table. All of them behave equally. */ From 047a494c2be9ef2b1a6d493d60a152e53ce92171 Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Thu, 9 Nov 2023 16:57:01 +0000 Subject: [PATCH 5/5] mem-cache: Optimize strided prefetcher address generation This commit optimizes the address generation logic in the strided prefetcher by introducing the following changes (d is the degree of the prefetcher) * Evaluate the fixed prefetch_stride only once (and not d-times) * Replace 2d multiplications (d * prefetch_stride and distance * prefetch_stride) with additions by updating the new base prefetch address while looping Change-Id: I49c52333fc4c7071ac3d73443f2ae07bfcd5b8e4 Signed-off-by: Giacomo Travaglini Reviewed-by: Richard Cooper Reviewed-by: Tiberiu Bucur --- src/mem/cache/prefetch/stride.cc | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc index d21fcd160c..4b709400c5 100644 --- a/src/mem/cache/prefetch/stride.cc +++ b/src/mem/cache/prefetch/stride.cc @@ -1,6 +1,6 @@ /* * Copyright (c) 2018 Inria - * Copyright (c) 2012-2013, 2015, 2022 Arm Limited + * Copyright (c) 2012-2013, 2015, 2022-2023 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -168,17 +168,16 @@ Stride::calculatePrefetch(const PrefetchInfo &pfi, return; } + // Round strides up to atleast 1 cacheline + int prefetch_stride = new_stride; + if (abs(new_stride) < blkSize) { + prefetch_stride = (new_stride < 0) ? -blkSize : blkSize; + } + + Addr new_addr = pf_addr + distance * prefetch_stride; // Generate up to degree prefetches for (int d = 1; d <= degree; d++) { - // Round strides up to atleast 1 cacheline - int prefetch_stride = new_stride; - if (abs(new_stride) < blkSize) { - prefetch_stride = (new_stride < 0) ? -blkSize : blkSize; - } - - Addr new_addr = pf_addr + distance * prefetch_stride - + d * prefetch_stride; - addresses.push_back(AddrPriority(new_addr, 0)); + addresses.push_back(AddrPriority(new_addr += prefetch_stride, 0)); } } else { // Miss in table