cpu: Explicitly define cache_line_size -> 64-bit unsigned int (#329)

While it's plausible to define the cache_line_size as a 32-bit unsigned int, the use of cache_line_size is way out of its original scope. cache_line_size has been used to produce an address mask, which masking out the offset bits from an address. For example, [1], [2], [3], and [4]. However, since the cache_line_size is an "unsigned int", the type of the value is not guaranteed to be 64-bit long. Subsequently, the bit twiddling hacks in [1], [2], [3], and [4] produce 32-bit mask, i.e., 0x00000000FFFFFFC0. This behavior at least caused a problem in LLSC in RISC-V [5], where the load reservation (LR) relies on the mask to produce the cache block address. Two distinct 64-bit addresses can be mapped to the same cache block using the above mask. This patch explicitly defines cache_line_size as a 64-bit unsigned int so the cache block mask can be produced correctly for 64-bit addresses. [1] 3bdcfd6f7a/src/cpu/simple/atomic.hh (L147) [2] 3bdcfd6f7a/src/cpu/simple/timing.hh (L224) [3] 3bdcfd6f7a/src/cpu/o3/lsq_unit.cc (L241) [4] 3bdcfd6f7a/src/cpu/minor/lsq.cc (L1425) [5] 3bdcfd6f7a/src/arch/riscv/isa.cc (L787)
2023-10-16 07:50:35 -07:00
parent d702d3b90a 1fc89bc8ae
commit d42eeb6b68
14 changed files with 26 additions and 26 deletions
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
    AddressMonitor &monitor = addressMonitor[tid];

    if (!monitor.gotWakeup) {
-        int block_size = cacheLineSize();
-        uint64_t mask = ~((uint64_t)(block_size - 1));
+        Addr block_size = cacheLineSize();
+        Addr mask = ~(block_size - 1);

        assert(pkt->req->hasPaddr());
        monitor.pAddr = pkt->getAddr() & mask;
@@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu)
    RequestPtr req = std::make_shared<Request>();

    Addr addr = monitor.vAddr;
-    int block_size = cacheLineSize();
-    uint64_t mask = ~((uint64_t)(block_size - 1));
+    Addr block_size = cacheLineSize();
+    Addr mask = ~(block_size - 1);
    int size = block_size;

    //The address of the next line if it crosses a cache line boundary.
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
    bool _switchedOut;

    /** Cache the cache line size that we get from the system */
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;

    /** Global CPU statistics that are merged into the Root object. */
    struct GlobalStats : public statistics::Group
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
    /**
     * Get the cache line size of the system.
     */
-    inline unsigned int cacheLineSize() const { return _cacheLineSize; }
+    inline Addr cacheLineSize() const { return _cacheLineSize; }

    /**
     * Serialize this object to the given output stream.
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -213,13 +213,13 @@ class Fetch1 : public Named
    /** Line snap size in bytes.  All fetches clip to make their ends not
     *  extend beyond this limit.  Setting this to the machine L1 cache line
     *  length will result in fetches never crossing line boundaries. */
-    unsigned int lineSnap;
+    Addr lineSnap;

    /** Maximum fetch width in bytes.  Setting this (and lineSnap) to the
     *  machine L1 cache line length will result in fetches of whole cache
     *  lines.  Setting this to sizeof(MachInst) will result it fetches of
     *  single instructions (except near the end of lineSnap lines) */
-    unsigned int maxLineWidth;
+    Addr maxLineWidth;

    /** Maximum number of fetches allowed in flight (in queues or memory) */
    unsigned int fetchLimit;
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -548,7 +548,7 @@ class LSQ : public Named
    const unsigned int inMemorySystemLimit;

    /** Memory system access width (and snap) in bytes */
-    const unsigned int lineWidth;
+    const Addr lineWidth;

  public:
    /** The LSQ consists of three queues: requests, transfers and the
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -470,7 +470,7 @@ class Fetch
    ThreadID retryTid;

    /** Cache block size. */
-    unsigned int cacheBlkSize;
+    Addr cacheBlkSize;

    /** The size of the fetch buffer in bytes. The fetch buffer
     *  itself may be smaller than a cache line.
--- a/src/cpu/testers/memtest/memtest.hh
+++ b/src/cpu/testers/memtest/memtest.hh
@@ -144,7 +144,7 @@ class MemTest : public ClockedObject
    // store the expected value for the addresses we have touched
    std::unordered_map<Addr, uint8_t> referenceData;

-    const unsigned blockSize;
+    const Addr blockSize;

    const Addr blockAddrMask;

--- a/src/cpu/trace/trace_cpu.cc
+++ b/src/cpu/trace/trace_cpu.cc
@@ -585,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
    // stat counting this is useful to keep a check on how frequently this
    // happens. If required the code could be revised to mimick splitting such
    // a request into two.
-    unsigned blk_size = owner.cacheLineSize;
+    Addr blk_size = owner.cacheLineSize;
    Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
    if (!(blk_offset + node_ptr->size <= blk_size)) {
        node_ptr->size = blk_size - blk_offset;
--- a/src/cpu/trace/trace_cpu.hh
+++ b/src/cpu/trace/trace_cpu.hh
@@ -286,7 +286,7 @@ class TraceCPU : public ClockedObject
    };

    /** Cache the cache line size that we get from the system */
-    const unsigned int cacheLineSize;
+    const Addr cacheLineSize;

    /** Port to connect to L1 instruction cache. */
    IcachePort icachePort;
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable
    /** Default substreamId */
    const uint32_t defaultSSid;

-    const int cacheLineSize;
+    const Addr cacheLineSize;

  protected:

@@ -257,7 +257,7 @@ class DmaDevice : public PioDevice

    void init() override;

-    unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
+    Addr cacheBlockSize() const { return sys->cacheLineSize(); }

    Port &getPort(const std::string &if_name,
                  PortID idx=InvalidPortID) override;
@@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable

    DmaPort &port;

-    const int cacheLineSize;
+    const Addr cacheLineSize;

  private:
    class DmaDoneEvent : public Event
--- a/src/learning_gem5/part2/simple_cache.hh
+++ b/src/learning_gem5/part2/simple_cache.hh
@@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject
    const Cycles latency;

    /// The block size for the cache
-    const unsigned blockSize;
+    const Addr blockSize;

    /// Number of blocks in the cache (size of cache / block size)
    const unsigned capacity;
--- a/src/mem/port_proxy.cc
+++ b/src/mem/port_proxy.cc
@@ -44,12 +44,12 @@
 namespace gem5
 {

-PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) :
+PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) :
    PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); },
        cache_line_size)
 {}

-PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) :
+PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) :
    PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); },
        cache_line_size)
 {}
--- a/src/mem/port_proxy.hh
+++ b/src/mem/port_proxy.hh
@@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol
    SendFunctionalFunc sendFunctional;

    /** Granularity of any transactions issued through this proxy. */
-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;

    void
    recvFunctionalSnoop(PacketPtr pkt) override
@@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol
    }

  public:
-    PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) :
+    PortProxy(SendFunctionalFunc func, Addr cache_line_size) :
        sendFunctional(func), _cacheLineSize(cache_line_size)
    {}

    // Helpers which create typical SendFunctionalFunc-s from other objects.
-    PortProxy(ThreadContext *tc, unsigned int cache_line_size);
-    PortProxy(const RequestPort &port, unsigned int cache_line_size);
+    PortProxy(ThreadContext *tc, Addr cache_line_size);
+    PortProxy(const RequestPort &port, Addr cache_line_size);

    virtual ~PortProxy() {}

--- a/src/mem/snoop_filter.hh
+++ b/src/mem/snoop_filter.hh
@@ -302,7 +302,7 @@ class SnoopFilter : public SimObject
    /** Track the mapping from port ids to the local mask ids. */
    std::vector<PortID> localResponsePortIds;
    /** Cache line size. */
-    const unsigned linesize;
+    const Addr linesize;
    /** Latency for doing a lookup in the filter */
    const Cycles lookupLatency;
    /** Max capacity in terms of cache blocks tracked, for sanity checking */
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope
    /**
     * Get the cache line size of the system.
     */
-    unsigned int cacheLineSize() const { return _cacheLineSize; }
+    Addr cacheLineSize() const { return _cacheLineSize; }

    Threads threads;

@@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope

    enums::MemoryMode memoryMode;

-    const unsigned int _cacheLineSize;
+    const Addr _cacheLineSize;

    uint64_t workItemsBegin = 0;
    uint64_t workItemsEnd = 0;