cpu: Explicitly define cache_line_size -> 64-bit unsigned int (#329)
While it's plausible to define the cache_line_size as a 32-bit unsigned int, the use of cache_line_size is way out of its original scope. cache_line_size has been used to produce an address mask, which masking out the offset bits from an address. For example, [1], [2], [3], and [4]. However, since the cache_line_size is an "unsigned int", the type of the value is not guaranteed to be 64-bit long. Subsequently, the bit twiddling hacks in [1], [2], [3], and [4] produce 32-bit mask, i.e., 0x00000000FFFFFFC0. This behavior at least caused a problem in LLSC in RISC-V [5], where the load reservation (LR) relies on the mask to produce the cache block address. Two distinct 64-bit addresses can be mapped to the same cache block using the above mask. This patch explicitly defines cache_line_size as a 64-bit unsigned int so the cache block mask can be produced correctly for 64-bit addresses. [1]3bdcfd6f7a/src/cpu/simple/atomic.hh (L147)[2]3bdcfd6f7a/src/cpu/simple/timing.hh (L224)[3]3bdcfd6f7a/src/cpu/o3/lsq_unit.cc (L241)[4]3bdcfd6f7a/src/cpu/minor/lsq.cc (L1425)[5]3bdcfd6f7a/src/arch/riscv/isa.cc (L787)
This commit is contained in:
@@ -257,8 +257,8 @@ BaseCPU::mwait(ThreadID tid, PacketPtr pkt)
|
||||
AddressMonitor &monitor = addressMonitor[tid];
|
||||
|
||||
if (!monitor.gotWakeup) {
|
||||
int block_size = cacheLineSize();
|
||||
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||
Addr block_size = cacheLineSize();
|
||||
Addr mask = ~(block_size - 1);
|
||||
|
||||
assert(pkt->req->hasPaddr());
|
||||
monitor.pAddr = pkt->getAddr() & mask;
|
||||
@@ -282,8 +282,8 @@ BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, BaseMMU *mmu)
|
||||
RequestPtr req = std::make_shared<Request>();
|
||||
|
||||
Addr addr = monitor.vAddr;
|
||||
int block_size = cacheLineSize();
|
||||
uint64_t mask = ~((uint64_t)(block_size - 1));
|
||||
Addr block_size = cacheLineSize();
|
||||
Addr mask = ~(block_size - 1);
|
||||
int size = block_size;
|
||||
|
||||
//The address of the next line if it crosses a cache line boundary.
|
||||
|
||||
@@ -143,7 +143,7 @@ class BaseCPU : public ClockedObject
|
||||
bool _switchedOut;
|
||||
|
||||
/** Cache the cache line size that we get from the system */
|
||||
const unsigned int _cacheLineSize;
|
||||
const Addr _cacheLineSize;
|
||||
|
||||
/** Global CPU statistics that are merged into the Root object. */
|
||||
struct GlobalStats : public statistics::Group
|
||||
@@ -394,7 +394,7 @@ class BaseCPU : public ClockedObject
|
||||
/**
|
||||
* Get the cache line size of the system.
|
||||
*/
|
||||
inline unsigned int cacheLineSize() const { return _cacheLineSize; }
|
||||
inline Addr cacheLineSize() const { return _cacheLineSize; }
|
||||
|
||||
/**
|
||||
* Serialize this object to the given output stream.
|
||||
|
||||
@@ -213,13 +213,13 @@ class Fetch1 : public Named
|
||||
/** Line snap size in bytes. All fetches clip to make their ends not
|
||||
* extend beyond this limit. Setting this to the machine L1 cache line
|
||||
* length will result in fetches never crossing line boundaries. */
|
||||
unsigned int lineSnap;
|
||||
Addr lineSnap;
|
||||
|
||||
/** Maximum fetch width in bytes. Setting this (and lineSnap) to the
|
||||
* machine L1 cache line length will result in fetches of whole cache
|
||||
* lines. Setting this to sizeof(MachInst) will result it fetches of
|
||||
* single instructions (except near the end of lineSnap lines) */
|
||||
unsigned int maxLineWidth;
|
||||
Addr maxLineWidth;
|
||||
|
||||
/** Maximum number of fetches allowed in flight (in queues or memory) */
|
||||
unsigned int fetchLimit;
|
||||
|
||||
@@ -548,7 +548,7 @@ class LSQ : public Named
|
||||
const unsigned int inMemorySystemLimit;
|
||||
|
||||
/** Memory system access width (and snap) in bytes */
|
||||
const unsigned int lineWidth;
|
||||
const Addr lineWidth;
|
||||
|
||||
public:
|
||||
/** The LSQ consists of three queues: requests, transfers and the
|
||||
|
||||
@@ -470,7 +470,7 @@ class Fetch
|
||||
ThreadID retryTid;
|
||||
|
||||
/** Cache block size. */
|
||||
unsigned int cacheBlkSize;
|
||||
Addr cacheBlkSize;
|
||||
|
||||
/** The size of the fetch buffer in bytes. The fetch buffer
|
||||
* itself may be smaller than a cache line.
|
||||
|
||||
@@ -144,7 +144,7 @@ class MemTest : public ClockedObject
|
||||
// store the expected value for the addresses we have touched
|
||||
std::unordered_map<Addr, uint8_t> referenceData;
|
||||
|
||||
const unsigned blockSize;
|
||||
const Addr blockSize;
|
||||
|
||||
const Addr blockAddrMask;
|
||||
|
||||
|
||||
@@ -585,7 +585,7 @@ TraceCPU::ElasticDataGen::executeMemReq(GraphNode* node_ptr)
|
||||
// stat counting this is useful to keep a check on how frequently this
|
||||
// happens. If required the code could be revised to mimick splitting such
|
||||
// a request into two.
|
||||
unsigned blk_size = owner.cacheLineSize;
|
||||
Addr blk_size = owner.cacheLineSize;
|
||||
Addr blk_offset = (node_ptr->physAddr & (Addr)(blk_size - 1));
|
||||
if (!(blk_offset + node_ptr->size <= blk_size)) {
|
||||
node_ptr->size = blk_size - blk_offset;
|
||||
|
||||
@@ -286,7 +286,7 @@ class TraceCPU : public ClockedObject
|
||||
};
|
||||
|
||||
/** Cache the cache line size that we get from the system */
|
||||
const unsigned int cacheLineSize;
|
||||
const Addr cacheLineSize;
|
||||
|
||||
/** Port to connect to L1 instruction cache. */
|
||||
IcachePort icachePort;
|
||||
|
||||
@@ -187,7 +187,7 @@ class DmaPort : public RequestPort, public Drainable
|
||||
/** Default substreamId */
|
||||
const uint32_t defaultSSid;
|
||||
|
||||
const int cacheLineSize;
|
||||
const Addr cacheLineSize;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -257,7 +257,7 @@ class DmaDevice : public PioDevice
|
||||
|
||||
void init() override;
|
||||
|
||||
unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
|
||||
Addr cacheBlockSize() const { return sys->cacheLineSize(); }
|
||||
|
||||
Port &getPort(const std::string &if_name,
|
||||
PortID idx=InvalidPortID) override;
|
||||
@@ -526,7 +526,7 @@ class DmaReadFifo : public Drainable, public Serializable
|
||||
|
||||
DmaPort &port;
|
||||
|
||||
const int cacheLineSize;
|
||||
const Addr cacheLineSize;
|
||||
|
||||
private:
|
||||
class DmaDoneEvent : public Event
|
||||
|
||||
@@ -267,7 +267,7 @@ class SimpleCache : public ClockedObject
|
||||
const Cycles latency;
|
||||
|
||||
/// The block size for the cache
|
||||
const unsigned blockSize;
|
||||
const Addr blockSize;
|
||||
|
||||
/// Number of blocks in the cache (size of cache / block size)
|
||||
const unsigned capacity;
|
||||
|
||||
@@ -44,12 +44,12 @@
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
PortProxy::PortProxy(ThreadContext *tc, unsigned int cache_line_size) :
|
||||
PortProxy::PortProxy(ThreadContext *tc, Addr cache_line_size) :
|
||||
PortProxy([tc](PacketPtr pkt)->void { tc->sendFunctional(pkt); },
|
||||
cache_line_size)
|
||||
{}
|
||||
|
||||
PortProxy::PortProxy(const RequestPort &port, unsigned int cache_line_size) :
|
||||
PortProxy::PortProxy(const RequestPort &port, Addr cache_line_size) :
|
||||
PortProxy([&port](PacketPtr pkt)->void { port.sendFunctional(pkt); },
|
||||
cache_line_size)
|
||||
{}
|
||||
|
||||
@@ -92,7 +92,7 @@ class PortProxy : FunctionalRequestProtocol
|
||||
SendFunctionalFunc sendFunctional;
|
||||
|
||||
/** Granularity of any transactions issued through this proxy. */
|
||||
const unsigned int _cacheLineSize;
|
||||
const Addr _cacheLineSize;
|
||||
|
||||
void
|
||||
recvFunctionalSnoop(PacketPtr pkt) override
|
||||
@@ -103,13 +103,13 @@ class PortProxy : FunctionalRequestProtocol
|
||||
}
|
||||
|
||||
public:
|
||||
PortProxy(SendFunctionalFunc func, unsigned int cache_line_size) :
|
||||
PortProxy(SendFunctionalFunc func, Addr cache_line_size) :
|
||||
sendFunctional(func), _cacheLineSize(cache_line_size)
|
||||
{}
|
||||
|
||||
// Helpers which create typical SendFunctionalFunc-s from other objects.
|
||||
PortProxy(ThreadContext *tc, unsigned int cache_line_size);
|
||||
PortProxy(const RequestPort &port, unsigned int cache_line_size);
|
||||
PortProxy(ThreadContext *tc, Addr cache_line_size);
|
||||
PortProxy(const RequestPort &port, Addr cache_line_size);
|
||||
|
||||
virtual ~PortProxy() {}
|
||||
|
||||
|
||||
@@ -302,7 +302,7 @@ class SnoopFilter : public SimObject
|
||||
/** Track the mapping from port ids to the local mask ids. */
|
||||
std::vector<PortID> localResponsePortIds;
|
||||
/** Cache line size. */
|
||||
const unsigned linesize;
|
||||
const Addr linesize;
|
||||
/** Latency for doing a lookup in the filter */
|
||||
const Cycles lookupLatency;
|
||||
/** Max capacity in terms of cache blocks tracked, for sanity checking */
|
||||
|
||||
@@ -305,7 +305,7 @@ class System : public SimObject, public PCEventScope
|
||||
/**
|
||||
* Get the cache line size of the system.
|
||||
*/
|
||||
unsigned int cacheLineSize() const { return _cacheLineSize; }
|
||||
Addr cacheLineSize() const { return _cacheLineSize; }
|
||||
|
||||
Threads threads;
|
||||
|
||||
@@ -405,7 +405,7 @@ class System : public SimObject, public PCEventScope
|
||||
|
||||
enums::MemoryMode memoryMode;
|
||||
|
||||
const unsigned int _cacheLineSize;
|
||||
const Addr _cacheLineSize;
|
||||
|
||||
uint64_t workItemsBegin = 0;
|
||||
uint64_t workItemsEnd = 0;
|
||||
|
||||
Reference in New Issue
Block a user