cpu: Explicitly define cache_line_size -> 64-bit unsigned int (#329)

While it's plausible to define the cache_line_size as a 32-bit unsigned
int, the use of cache_line_size is way out of its original scope.

cache_line_size has been used to produce an address mask, which masking
out the offset bits from an address. For example, [1], [2], [3], and
[4]. However, since the cache_line_size is an "unsigned int", the type
of the value is not guaranteed to be 64-bit long. Subsequently, the bit
twiddling hacks in [1], [2], [3], and [4] produce 32-bit mask, i.e.,
0x00000000FFFFFFC0.

This behavior at least caused a problem in LLSC in RISC-V [5], where the
load reservation (LR) relies on the mask to produce the cache block
address. Two distinct 64-bit addresses can be mapped to the same cache
block using the above mask.

This patch explicitly defines cache_line_size as a 64-bit unsigned int
so the cache block mask can be produced correctly for 64-bit addresses.

[1]
3bdcfd6f7a/src/cpu/simple/atomic.hh (L147)
[2]
3bdcfd6f7a/src/cpu/simple/timing.hh (L224)
[3]
3bdcfd6f7a/src/cpu/o3/lsq_unit.cc (L241)
[4]
3bdcfd6f7a/src/cpu/minor/lsq.cc (L1425)
[5]
3bdcfd6f7a/src/arch/riscv/isa.cc (L787)
This commit is contained in:
Bobby R. Bruce
2023-10-16 07:50:35 -07:00
committed by GitHub
14 changed files with 26 additions and 26 deletions

View File

@@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
AddressMonitor &monitor = addressMonitor[tid];
if (!monitor.gotWakeup) {
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
Addr block_size = cacheLineSize();
Addr mask = ~(block_size - 1);
assert(pkt->req->hasPaddr());
monitor.pAddr = pkt->getAddr() & mask;
@@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu)
RequestPtr req = std::make_shared<Request>();
Addr addr = monitor.vAddr;
int block_size = cacheLineSize();
uint64_t mask = ~((uint64_t)(block_size - 1));
Addr block_size = cacheLineSize();
Addr mask = ~(block_size - 1);
int size = block_size;
//The address of the next line if it crosses a cache line boundary.

View File

@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
bool _switchedOut;
/** Cache the cache line size that we get from the system */
const unsigned int _cacheLineSize;
const Addr _cacheLineSize;
/** Global CPU statistics that are merged into the Root object. */
struct GlobalStats : public statistics::Group
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
/**
* Get the cache line size of the system.
*/
inline unsigned int cacheLineSize() const { return _cacheLineSize; }
inline Addr cacheLineSize() const { return _cacheLineSize; }
/**
* Serialize this object to the given output stream.

View File

@@ -213,13 +213,13 @@ class Fetch1 : public Named
/** Line snap size in bytes. All fetches clip to make their ends not
* extend beyond this limit. Setting this to the machine L1 cache line
* length will result in fetches never crossing line boundaries. */
unsigned int lineSnap;
Addr lineSnap;
/** Maximum fetch width in bytes. Setting this (and lineSnap) to the
* machine L1 cache line length will result in fetches of whole cache
* lines. Setting this to sizeof(MachInst) will result it fetches of
* single instructions (except near the end of lineSnap lines) */
unsigned int maxLineWidth;
Addr maxLineWidth;
/** Maximum number of fetches allowed in flight (in queues or memory) */
unsigned int fetchLimit;

View File

@@ -548,7 +548,7 @@ class LSQ : public Named
const unsigned int inMemorySystemLimit;
/** Memory system access width (and snap) in bytes */
const unsigned int lineWidth;
const Addr lineWidth;
public:
/** The LSQ consists of three queues: requests, transfers and the

View File

@@ -470,7 +470,7 @@ class Fetch
ThreadID retryTid;
/** Cache block size. */
unsigned int cacheBlkSize;
Addr cacheBlkSize;
/** The size of the fetch buffer in bytes. The fetch buffer
* itself may be smaller than a cache line.

View File

@@ -144,7 +144,7 @@ class MemTest : public ClockedObject
// store the expected value for the addresses we have touched
std::unordered_map<Addr, uint8_t> referenceData;
const unsigned blockSize;
const Addr blockSize;
const Addr blockAddrMask;

View File

@@ -585,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
// stat counting this is useful to keep a check on how frequently this
// happens. If required the code could be revised to mimick splitting such
// a request into two.
unsigned blk_size = owner.cacheLineSize;
Addr blk_size = owner.cacheLineSize;
Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
if (!(blk_offset + node_ptr->size <= blk_size)) {
node_ptr->size = blk_size - blk_offset;

View File

@@ -286,7 +286,7 @@ class TraceCPU : public ClockedObject
};
/** Cache the cache line size that we get from the system */
const unsigned int cacheLineSize;
const Addr cacheLineSize;
/** Port to connect to L1 instruction cache. */
IcachePort icachePort;

View File

@@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable
/** Default substreamId */
const uint32_t defaultSSid;
const int cacheLineSize;
const Addr cacheLineSize;
protected:
@@ -257,7 +257,7 @@ class DmaDevice : public PioDevice
void init() override;
unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
Addr cacheBlockSize() const { return sys->cacheLineSize(); }
Port &getPort(const std::string &if_name,
PortID idx=InvalidPortID) override;
@@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable
DmaPort &port;
const int cacheLineSize;
const Addr cacheLineSize;
private:
class DmaDoneEvent : public Event

View File

@@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject
const Cycles latency;
/// The block size for the cache
const unsigned blockSize;
const Addr blockSize;
/// Number of blocks in the cache (size of cache / block size)
const unsigned capacity;

View File

@@ -44,12 +44,12 @@
namespace gem5
{
PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) :
PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) :
PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); },
cache_line_size)
{}
PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) :
PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) :
PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); },
cache_line_size)
{}

View File

@@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol
SendFunctionalFunc sendFunctional;
/** Granularity of any transactions issued through this proxy. */
const unsigned int _cacheLineSize;
const Addr _cacheLineSize;
void
recvFunctionalSnoop(PacketPtr pkt) override
@@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol
}
public:
PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) :
PortProxy(SendFunctionalFunc func, Addr cache_line_size) :
sendFunctional(func), _cacheLineSize(cache_line_size)
{}
// Helpers which create typical SendFunctionalFunc-s from other objects.
PortProxy(ThreadContext *tc, unsigned int cache_line_size);
PortProxy(const RequestPort &port, unsigned int cache_line_size);
PortProxy(ThreadContext *tc, Addr cache_line_size);
PortProxy(const RequestPort &port, Addr cache_line_size);
virtual ~PortProxy() {}

View File

@@ -302,7 +302,7 @@ class SnoopFilter : public SimObject
/** Track the mapping from port ids to the local mask ids. */
std::vector<PortID> localResponsePortIds;
/** Cache line size. */
const unsigned linesize;
const Addr linesize;
/** Latency for doing a lookup in the filter */
const Cycles lookupLatency;
/** Max capacity in terms of cache blocks tracked, for sanity checking */

View File

@@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope
/**
* Get the cache line size of the system.
*/
unsigned int cacheLineSize() const { return _cacheLineSize; }
Addr cacheLineSize() const { return _cacheLineSize; }
Threads threads;
@@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope
enums::MemoryMode memoryMode;
const unsigned int _cacheLineSize;
const Addr _cacheLineSize;
uint64_t workItemsBegin = 0;
uint64_t workItemsEnd = 0;