From 9057eeabec7304df714f48e0df3641ddf60910d2 Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hn@hnpl.org>
Date: Tue, 19 Sep 2023 12:54:18 -0700
Subject: [PATCH 1/3] cpu: Explicitly define cache_line_size -> 64-bit unsigned
 int

While it makes sense to define the cache_line_size as a 32-bit unsigned int,
the use of cache_line_size is way out of its original scope.

cache_line_size has been used to produce an address mask, which masking out
the offset bits from an address. For example, [1], [2], [3], and [4].
However, since the cache_line_size is an "unsigned int", the type of the
value is not guaranteed to be 64-bit long. Subsequently, the
bit twiddling hacks in [1], [2], [3], and [4] produce 32-bit mask,
i.e., 0x00000000FFFFFFC0.

This behavior at least caused a problem in LLSC in RISC-V [5], where the
load reservation (LR) relies on the mask to produce the cache block address.
Two distinct 64-bit addresses can be mapped to the same cache block using
the above mask.

This patch explicitly defines cache_line_size as a 64-bit unsigned int so
the cache block mask can be produced correctly for 64-bit addresses.

[1] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/simple/atomic.hh#L147
[2] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/simple/timing.hh#L224
[3] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/o3/lsq_unit.cc#L241
[4] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/minor/lsq.cc#L1425
[5] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/arch/riscv/isa.cc#L787

Change-Id: I29abc7aaab266a37326846bbf7a82219071c4ffe
Signed-off-by: Hoa Nguyen <hn@hnpl.org>
---
 src/cpu/base.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 3976b66fe4..a05102a107 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
     bool _switchedOut;
 
     /** Cache the cache line size that we get from the system */
-    const unsigned int _cacheLineSize;
+    const uint64_t _cacheLineSize;
 
     /** Global CPU statistics that are merged into the Root object. */
     struct GlobalStats : public statistics::Group
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
     /**
      * Get the cache line size of the system.
      */
-    inline unsigned int cacheLineSize() const { return _cacheLineSize; }
+    inline uint64_t cacheLineSize() const { return _cacheLineSize; }
 
     /**
      * Serialize this object to the given output stream.

From ac5280fedc9ed72382534e4784377d97dfbeaaa1 Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hn@hnpl.org>
Date: Wed, 20 Sep 2023 13:49:10 -0700
Subject: [PATCH 2/3] mem,sim: Change the type of cache_line_size to Addr

Change-Id: Id39e8249fef89c0d59bb39f8104650257ff00245
Signed-off-by: Hoa Nguyen <hn@hnpl.org>
---
 src/mem/port_proxy.cc | 4 ++--
 src/mem/port_proxy.hh | 8 ++++----
 src/sim/system.hh     | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mem/port_proxy.cc b/src/mem/port_proxy.cc
index 44e8fbbc90..a3c82452e2 100644
--- a/src/mem/port_proxy.cc
+++ b/src/mem/port_proxy.cc
@@ -44,12 +44,12 @@
 namespace gem5
 {
 
-PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) :
+PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) :
     PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); },
         cache_line_size)
 {}
 
-PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) :
+PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) :
     PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); },
         cache_line_size)
 {}
diff --git a/src/mem/port_proxy.hh b/src/mem/port_proxy.hh
index 2e60a527cd..49c6d6f811 100644
--- a/src/mem/port_proxy.hh
+++ b/src/mem/port_proxy.hh
@@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol
     SendFunctionalFunc sendFunctional;
 
     /** Granularity of any transactions issued through this proxy. */
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;
 
     void
     recvFunctionalSnoop(PacketPtr pkt) override
@@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol
     }
 
   public:
-    PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) :
+    PortProxy(SendFunctionalFunc func, Addr cache_line_size) :
         sendFunctional(func), _cacheLineSize(cache_line_size)
     {}
 
     // Helpers which create typical SendFunctionalFunc-s from other objects.
-    PortProxy(ThreadContext *tc, unsigned int cache_line_size);
-    PortProxy(const RequestPort &port, unsigned int cache_line_size);
+    PortProxy(ThreadContext *tc, Addr cache_line_size);
+    PortProxy(const RequestPort &port, Addr cache_line_size);
 
     virtual ~PortProxy() {}
 
diff --git a/src/sim/system.hh b/src/sim/system.hh
index d2725c32a9..bb64f639b5 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope
     /**
      * Get the cache line size of the system.
      */
-    unsigned int cacheLineSize() const { return _cacheLineSize; }
+    Addr cacheLineSize() const { return _cacheLineSize; }
 
     Threads threads;
 
@@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope
 
     enums::MemoryMode memoryMode;
 
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;
 
     uint64_t workItemsBegin = 0;
     uint64_t workItemsEnd = 0;

From 1fc89bc8aeb1b2a11f89372093d2107873651e3f Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hn@hnpl.org>
Date: Wed, 20 Sep 2023 14:16:46 -0700
Subject: [PATCH 3/3] cpu,mem,dev: Use Addr for cacheLineSize

Change-Id: I2f056571dbf35081d58afda09726c600141d5a05
Signed-off-by: Hoa Nguyen <hn@hnpl.org>
---
 src/cpu/base.cc                         | 8 ++++----
 src/cpu/base.hh                         | 4 ++--
 src/cpu/minor/fetch1.hh                 | 4 ++--
 src/cpu/minor/lsq.hh                    | 2 +-
 src/cpu/o3/fetch.hh                     | 2 +-
 src/cpu/testers/memtest/memtest.hh      | 2 +-
 src/cpu/trace/trace_cpu.cc              | 2 +-
 src/cpu/trace/trace_cpu.hh              | 2 +-
 src/dev/dma_device.hh                   | 6 +++---
 src/learning_gem5/part2/simple_cache.hh | 2 +-
 src/mem/snoop_filter.hh                 | 2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index a61c99796c..bea21a1928 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
     AddressMonitor &monitor = addressMonitor[tid];
 
     if (!monitor.gotWakeup) {
-        int block_size = cacheLineSize();
-        uint64_t mask = ~((uint64_t)(block_size - 1));
+        Addr block_size = cacheLineSize();
+        Addr mask = ~(block_size - 1);
 
         assert(pkt->req->hasPaddr());
         monitor.pAddr = pkt->getAddr() & mask;
@@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu)
     RequestPtr req = std::make_shared<Request>();
 
     Addr addr = monitor.vAddr;
-    int block_size = cacheLineSize();
-    uint64_t mask = ~((uint64_t)(block_size - 1));
+    Addr block_size = cacheLineSize();
+    Addr mask = ~(block_size - 1);
     int size = block_size;
 
     //The address of the next line if it crosses a cache line boundary.
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index a05102a107..a6c80dadbe 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
     bool _switchedOut;
 
     /** Cache the cache line size that we get from the system */
-    const uint64_t _cacheLineSize;
+    const Addr _cacheLineSize;
 
     /** Global CPU statistics that are merged into the Root object. */
     struct GlobalStats : public statistics::Group
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
     /**
      * Get the cache line size of the system.
      */
-    inline uint64_t cacheLineSize() const { return _cacheLineSize; }
+    inline Addr cacheLineSize() const { return _cacheLineSize; }
 
     /**
      * Serialize this object to the given output stream.
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh
index f6a796ce82..b65bb70d7b 100644
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -213,13 +213,13 @@ class Fetch1 : public Named
     /** Line snap size in bytes.  All fetches clip to make their ends not
      *  extend beyond this limit.  Setting this to the machine L1 cache line
      *  length will result in fetches never crossing line boundaries. */
-    unsigned int lineSnap;
+    Addr lineSnap;
 
     /** Maximum fetch width in bytes.  Setting this (and lineSnap) to the
      *  machine L1 cache line length will result in fetches of whole cache
      *  lines.  Setting this to sizeof(MachInst) will result it fetches of
      *  single instructions (except near the end of lineSnap lines) */
-    unsigned int maxLineWidth;
+    Addr maxLineWidth;
 
     /** Maximum number of fetches allowed in flight (in queues or memory) */
     unsigned int fetchLimit;
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 4d7c351e7a..e30a615803 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -548,7 +548,7 @@ class LSQ : public Named
     const unsigned int inMemorySystemLimit;
 
     /** Memory system access width (and snap) in bytes */
-    const unsigned int lineWidth;
+    const Addr lineWidth;
 
   public:
     /** The LSQ consists of three queues: requests, transfers and the
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 6add31444d..2c6da6708a 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -470,7 +470,7 @@ class Fetch
     ThreadID retryTid;
 
     /** Cache block size. */
-    unsigned int cacheBlkSize;
+    Addr cacheBlkSize;
 
     /** The size of the fetch buffer in bytes. The fetch buffer
      *  itself may be smaller than a cache line.
diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh
index 3fd1674191..1aa170b2db 100644
--- a/src/cpu/testers/memtest/memtest.hh
+++ b/src/cpu/testers/memtest/memtest.hh
@@ -142,7 +142,7 @@ class MemTest : public ClockedObject
     // store the expected value for the addresses we have touched
     std::unordered_map<Addr, uint8_t> referenceData;
 
-    const unsigned blockSize;
+    const Addr blockSize;
 
     const Addr blockAddrMask;
 
diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc
index d5c6b4fb3c..336a13beda 100644
--- a/src/cpu/trace/trace_cpu.cc
+++ b/src/cpu/trace/trace_cpu.cc
@@ -585,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
     // stat counting this is useful to keep a check on how frequently this
     // happens. If required the code could be revised to mimick splitting such
     // a request into two.
-    unsigned blk_size = owner.cacheLineSize;
+    Addr blk_size = owner.cacheLineSize;
     Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
     if (!(blk_offset + node_ptr->size <= blk_size)) {
         node_ptr->size = blk_size - blk_offset;
diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh
index d54b3c4227..cd32230aca 100644
--- a/src/cpu/trace/trace_cpu.hh
+++ b/src/cpu/trace/trace_cpu.hh
@@ -286,7 +286,7 @@ class TraceCPU : public ClockedObject
     };
 
     /** Cache the cache line size that we get from the system */
-    const unsigned int cacheLineSize;
+    const Addr cacheLineSize;
 
     /** Port to connect to L1 instruction cache. */
     IcachePort icachePort;
diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh
index 92b44bf5f6..3fd77860f4 100644
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable
     /** Default substreamId */
     const uint32_t defaultSSid;
 
-    const int cacheLineSize;
+    const Addr cacheLineSize;
 
   protected:
 
@@ -257,7 +257,7 @@ class DmaDevice : public PioDevice
 
     void init() override;
 
-    unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
+    Addr cacheBlockSize() const { return sys->cacheLineSize(); }
 
     Port &getPort(const std::string &if_name,
                   PortID idx=InvalidPortID) override;
@@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable
 
     DmaPort &port;
 
-    const int cacheLineSize;
+    const Addr cacheLineSize;
 
   private:
     class DmaDoneEvent : public Event
diff --git a/src/learning_gem5/part2/simple_cache.hh b/src/learning_gem5/part2/simple_cache.hh
index 25d195d4f1..1ca87dd126 100644
--- a/src/learning_gem5/part2/simple_cache.hh
+++ b/src/learning_gem5/part2/simple_cache.hh
@@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject
     const Cycles latency;
 
     /// The block size for the cache
-    const unsigned blockSize;
+    const Addr blockSize;
 
     /// Number of blocks in the cache (size of cache / block size)
     const unsigned capacity;
diff --git a/src/mem/snoop_filter.hh b/src/mem/snoop_filter.hh
index 7d4a222874..23cf77fcdd 100644
--- a/src/mem/snoop_filter.hh
+++ b/src/mem/snoop_filter.hh
@@ -302,7 +302,7 @@ class SnoopFilter : public SimObject
     /** Track the mapping from port ids to the local mask ids. */
     std::vector<PortID> localResponsePortIds;
     /** Cache line size. */
-    const unsigned linesize;
+    const Addr linesize;
     /** Latency for doing a lookup in the filter */
     const Cycles lookupLatency;
     /** Max capacity in terms of cache blocks tracked, for sanity checking */