From 9057eeabec7304df714f48e0df3641ddf60910d2 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Tue, 19 Sep 2023 12:54:18 -0700 Subject: [PATCH 1/3] cpu: Explicitly define cache_line_size -> 64-bit unsigned int While it makes sense to define the cache_line_size as a 32-bit unsigned int, the use of cache_line_size is way out of its original scope. cache_line_size has been used to produce an address mask, which masking out the offset bits from an address. For example, [1], [2], [3], and [4]. However, since the cache_line_size is an "unsigned int", the type of the value is not guaranteed to be 64-bit long. Subsequently, the bit twiddling hacks in [1], [2], [3], and [4] produce 32-bit mask, i.e., 0x00000000FFFFFFC0. This behavior at least caused a problem in LLSC in RISC-V [5], where the load reservation (LR) relies on the mask to produce the cache block address. Two distinct 64-bit addresses can be mapped to the same cache block using the above mask. This patch explicitly defines cache_line_size as a 64-bit unsigned int so the cache block mask can be produced correctly for 64-bit addresses. [1] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/simple/atomic.hh#L147 [2] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/simple/timing.hh#L224 [3] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/o3/lsq_unit.cc#L241 [4] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/cpu/minor/lsq.cc#L1425 [5] https://github.com/gem5/gem5/blob/3bdcfd6f7abdf0c9125ec37018df76a29998a055/src/arch/riscv/isa.cc#L787 Change-Id: I29abc7aaab266a37326846bbf7a82219071c4ffe Signed-off-by: Hoa Nguyen --- src/cpu/base.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 3976b66fe4..a05102a107 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject bool _switchedOut; /** Cache the cache line size that we get from the system */ - const unsigned int _cacheLineSize; + const uint64_t _cacheLineSize; /** Global CPU statistics that are merged into the Root object. */ struct GlobalStats : public statistics::Group @@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject /** * Get the cache line size of the system. */ - inline unsigned int cacheLineSize() const { return _cacheLineSize; } + inline uint64_t cacheLineSize() const { return _cacheLineSize; } /** * Serialize this object to the given output stream. From ac5280fedc9ed72382534e4784377d97dfbeaaa1 Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Wed, 20 Sep 2023 13:49:10 -0700 Subject: [PATCH 2/3] mem,sim: Change the type of cache_line_size to Addr Change-Id: Id39e8249fef89c0d59bb39f8104650257ff00245 Signed-off-by: Hoa Nguyen --- src/mem/port_proxy.cc | 4 ++-- src/mem/port_proxy.hh | 8 ++++---- src/sim/system.hh | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mem/port_proxy.cc b/src/mem/port_proxy.cc index 44e8fbbc90..a3c82452e2 100644 --- a/src/mem/port_proxy.cc +++ b/src/mem/port_proxy.cc @@ -44,12 +44,12 @@ namespace gem5 { -PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) : +PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) : PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); }, cache_line_size) {} -PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) : +PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) : PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); }, cache_line_size) {} diff --git a/src/mem/port_proxy.hh b/src/mem/port_proxy.hh index 2e60a527cd..49c6d6f811 100644 --- a/src/mem/port_proxy.hh +++ b/src/mem/port_proxy.hh @@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol SendFunctionalFunc sendFunctional; /** Granularity of any transactions issued through this proxy. */ - const unsigned int _cacheLineSize; + const Addr _cacheLineSize; void recvFunctionalSnoop(PacketPtr pkt) override @@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol } public: - PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) : + PortProxy(SendFunctionalFunc func, Addr cache_line_size) : sendFunctional(func), _cacheLineSize(cache_line_size) {} // Helpers which create typical SendFunctionalFunc-s from other objects. - PortProxy(ThreadContext *tc, unsigned int cache_line_size); - PortProxy(const RequestPort &port, unsigned int cache_line_size); + PortProxy(ThreadContext *tc, Addr cache_line_size); + PortProxy(const RequestPort &port, Addr cache_line_size); virtual ~PortProxy() {} diff --git a/src/sim/system.hh b/src/sim/system.hh index d2725c32a9..bb64f639b5 100644 --- a/src/sim/system.hh +++ b/src/sim/system.hh @@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope /** * Get the cache line size of the system. */ - unsigned int cacheLineSize() const { return _cacheLineSize; } + Addr cacheLineSize() const { return _cacheLineSize; } Threads threads; @@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope enums::MemoryMode memoryMode; - const unsigned int _cacheLineSize; + const Addr _cacheLineSize; uint64_t workItemsBegin = 0; uint64_t workItemsEnd = 0; From 1fc89bc8aeb1b2a11f89372093d2107873651e3f Mon Sep 17 00:00:00 2001 From: Hoa Nguyen Date: Wed, 20 Sep 2023 14:16:46 -0700 Subject: [PATCH 3/3] cpu,mem,dev: Use Addr for cacheLineSize Change-Id: I2f056571dbf35081d58afda09726c600141d5a05 Signed-off-by: Hoa Nguyen --- src/cpu/base.cc | 8 ++++---- src/cpu/base.hh | 4 ++-- src/cpu/minor/fetch1.hh | 4 ++-- src/cpu/minor/lsq.hh | 2 +- src/cpu/o3/fetch.hh | 2 +- src/cpu/testers/memtest/memtest.hh | 2 +- src/cpu/trace/trace_cpu.cc | 2 +- src/cpu/trace/trace_cpu.hh | 2 +- src/dev/dma_device.hh | 6 +++--- src/learning_gem5/part2/simple_cache.hh | 2 +- src/mem/snoop_filter.hh | 2 +- 11 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/cpu/base.cc b/src/cpu/base.cc index a61c99796c..bea21a1928 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt) AddressMonitor &monitor = addressMonitor[tid]; if (!monitor.gotWakeup) { - int block_size = cacheLineSize(); - uint64_t mask = ~((uint64_t)(block_size - 1)); + Addr block_size = cacheLineSize(); + Addr mask = ~(block_size - 1); assert(pkt->req->hasPaddr()); monitor.pAddr = pkt->getAddr() & mask; @@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu) RequestPtr req = std::make_shared(); Addr addr = monitor.vAddr; - int block_size = cacheLineSize(); - uint64_t mask = ~((uint64_t)(block_size - 1)); + Addr block_size = cacheLineSize(); + Addr mask = ~(block_size - 1); int size = block_size; //The address of the next line if it crosses a cache line boundary. diff --git a/src/cpu/base.hh b/src/cpu/base.hh index a05102a107..a6c80dadbe 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject bool _switchedOut; /** Cache the cache line size that we get from the system */ - const uint64_t _cacheLineSize; + const Addr _cacheLineSize; /** Global CPU statistics that are merged into the Root object. */ struct GlobalStats : public statistics::Group @@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject /** * Get the cache line size of the system. */ - inline uint64_t cacheLineSize() const { return _cacheLineSize; } + inline Addr cacheLineSize() const { return _cacheLineSize; } /** * Serialize this object to the given output stream. diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh index f6a796ce82..b65bb70d7b 100644 --- a/src/cpu/minor/fetch1.hh +++ b/src/cpu/minor/fetch1.hh @@ -213,13 +213,13 @@ class Fetch1 : public Named /** Line snap size in bytes. All fetches clip to make their ends not * extend beyond this limit. Setting this to the machine L1 cache line * length will result in fetches never crossing line boundaries. */ - unsigned int lineSnap; + Addr lineSnap; /** Maximum fetch width in bytes. Setting this (and lineSnap) to the * machine L1 cache line length will result in fetches of whole cache * lines. Setting this to sizeof(MachInst) will result it fetches of * single instructions (except near the end of lineSnap lines) */ - unsigned int maxLineWidth; + Addr maxLineWidth; /** Maximum number of fetches allowed in flight (in queues or memory) */ unsigned int fetchLimit; diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh index 4d7c351e7a..e30a615803 100644 --- a/src/cpu/minor/lsq.hh +++ b/src/cpu/minor/lsq.hh @@ -548,7 +548,7 @@ class LSQ : public Named const unsigned int inMemorySystemLimit; /** Memory system access width (and snap) in bytes */ - const unsigned int lineWidth; + const Addr lineWidth; public: /** The LSQ consists of three queues: requests, transfers and the diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh index 6add31444d..2c6da6708a 100644 --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -470,7 +470,7 @@ class Fetch ThreadID retryTid; /** Cache block size. */ - unsigned int cacheBlkSize; + Addr cacheBlkSize; /** The size of the fetch buffer in bytes. The fetch buffer * itself may be smaller than a cache line. diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh index 3fd1674191..1aa170b2db 100644 --- a/src/cpu/testers/memtest/memtest.hh +++ b/src/cpu/testers/memtest/memtest.hh @@ -142,7 +142,7 @@ class MemTest : public ClockedObject // store the expected value for the addresses we have touched std::unordered_map referenceData; - const unsigned blockSize; + const Addr blockSize; const Addr blockAddrMask; diff --git a/src/cpu/trace/trace_cpu.cc b/src/cpu/trace/trace_cpu.cc index d5c6b4fb3c..336a13beda 100644 --- a/src/cpu/trace/trace_cpu.cc +++ b/src/cpu/trace/trace_cpu.cc @@ -585,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr) // stat counting this is useful to keep a check on how frequently this // happens. If required the code could be revised to mimick splitting such // a request into two. - unsigned blk_size = owner.cacheLineSize; + Addr blk_size = owner.cacheLineSize; Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1)); if (!(blk_offset + node_ptr->size <= blk_size)) { node_ptr->size = blk_size - blk_offset; diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh index d54b3c4227..cd32230aca 100644 --- a/src/cpu/trace/trace_cpu.hh +++ b/src/cpu/trace/trace_cpu.hh @@ -286,7 +286,7 @@ class TraceCPU : public ClockedObject }; /** Cache the cache line size that we get from the system */ - const unsigned int cacheLineSize; + const Addr cacheLineSize; /** Port to connect to L1 instruction cache. */ IcachePort icachePort; diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh index 92b44bf5f6..3fd77860f4 100644 --- a/src/dev/dma_device.hh +++ b/src/dev/dma_device.hh @@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable /** Default substreamId */ const uint32_t defaultSSid; - const int cacheLineSize; + const Addr cacheLineSize; protected: @@ -257,7 +257,7 @@ class DmaDevice : public PioDevice void init() override; - unsigned int cacheBlockSize() const { return sys->cacheLineSize(); } + Addr cacheBlockSize() const { return sys->cacheLineSize(); } Port &getPort(const std::string &if_name, PortID idx=InvalidPortID) override; @@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable DmaPort &port; - const int cacheLineSize; + const Addr cacheLineSize; private: class DmaDoneEvent : public Event diff --git a/src/learning_gem5/part2/simple_cache.hh b/src/learning_gem5/part2/simple_cache.hh index 25d195d4f1..1ca87dd126 100644 --- a/src/learning_gem5/part2/simple_cache.hh +++ b/src/learning_gem5/part2/simple_cache.hh @@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject const Cycles latency; /// The block size for the cache - const unsigned blockSize; + const Addr blockSize; /// Number of blocks in the cache (size of cache / block size) const unsigned capacity; diff --git a/src/mem/snoop_filter.hh b/src/mem/snoop_filter.hh index 7d4a222874..23cf77fcdd 100644 --- a/src/mem/snoop_filter.hh +++ b/src/mem/snoop_filter.hh @@ -302,7 +302,7 @@ class SnoopFilter : public SimObject /** Track the mapping from port ids to the local mask ids. */ std::vector localResponsePortIds; /** Cache line size. */ - const unsigned linesize; + const Addr linesize; /** Latency for doing a lookup in the filter */ const Cycles lookupLatency; /** Max capacity in terms of cache blocks tracked, for sanity checking */