cpu: De-templatize O3's LSQUnit.

Change-Id: Id426950b4fec9b98855b3f9f95e63fc0d9b6e64f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42107
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Nathanael Premillieu <nathanael.premillieu@huawei.com>
This commit is contained in:
Gabe Black
2021-03-03 02:38:32 -08:00
parent ea46bfee0f
commit 0f667aff1f
6 changed files with 1814 additions and 1795 deletions

View File

@@ -384,11 +384,11 @@ class BaseO3DynInst : public ExecContext, public RefCounted
/** Load queue index. */
ssize_t lqIdx = -1;
typename ::LSQUnit<O3CPUImpl>::LQIterator lqIt;
typename LSQUnit::LQIterator lqIt;
/** Store queue index. */
ssize_t sqIdx = -1;
typename ::LSQUnit<O3CPUImpl>::SQIterator sqIt;
typename LSQUnit::SQIterator sqIt;
/////////////////////// TLB Miss //////////////////////

View File

@@ -68,7 +68,6 @@ class FullO3CPU;
template <class Impl>
class DefaultIEW;
template <class Impl>
class LSQUnit;
template <class Impl>
@@ -291,7 +290,7 @@ class LSQ
bool isDelayed() { return flags.isSet(Flag::Delayed); }
public:
LSQUnit<Impl>& _port;
LSQUnit& _port;
const O3DynInstPtr _inst;
uint32_t _taskId;
PacketDataPtr _data;
@@ -306,9 +305,9 @@ class LSQ
uint32_t _numOutstandingPackets;
AtomicOpFunctorPtr _amo_op;
protected:
LSQUnit<Impl>* lsqUnit() { return &_port; }
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad);
LSQRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
LSQUnit* lsqUnit() { return &_port; }
LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad);
LSQRequest(LSQUnit* port, const O3DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr);
@@ -668,7 +667,7 @@ class LSQ
using LSQRequest::_numOutstandingPackets;
using LSQRequest::_amo_op;
public:
SingleDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
SingleDataRequest(LSQUnit* port, const O3DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags& flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr, AtomicOpFunctorPtr amo_op=nullptr) :
@@ -706,7 +705,7 @@ class LSQ
using LSQRequest::flags;
using LSQRequest::setState;
public:
HtmCmdRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
HtmCmdRequest(LSQUnit* port, const O3DynInstPtr& inst,
const Request::Flags& flags_);
inline virtual ~HtmCmdRequest() {}
virtual void initiateTranslation();
@@ -753,7 +752,7 @@ class LSQ
PacketPtr _mainPacket;
public:
SplitDataRequest(LSQUnit<Impl>* port, const O3DynInstPtr& inst,
SplitDataRequest(LSQUnit* port, const O3DynInstPtr& inst,
bool isLoad, const Addr& addr, const uint32_t& size,
const Request::Flags & flags_, PacketDataPtr data=nullptr,
uint64_t* res=nullptr) :
@@ -829,14 +828,12 @@ class LSQ
/**
* Commits loads up until the given sequence number for a specific thread.
*/
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
{ thread.at(tid).commitLoads(youngest_inst); }
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid);
/**
* Commits stores up until the given sequence number for a specific thread.
*/
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
{ thread.at(tid).commitStores(youngest_inst); }
void commitStores(InstSeqNum &youngest_inst, ThreadID tid);
/**
* Attempts to write back stores until all cache ports are used or the
@@ -849,99 +846,55 @@ class LSQ
/**
* Squash instructions from a thread until the specified sequence number.
*/
void
squash(const InstSeqNum &squashed_num, ThreadID tid)
{
thread.at(tid).squash(squashed_num);
}
void squash(const InstSeqNum &squashed_num, ThreadID tid);
/** Returns whether or not there was a memory ordering violation. */
bool violation();
/**
* Returns whether or not there was a memory ordering violation for a
* specific thread.
*/
bool violation(ThreadID tid) { return thread.at(tid).violation(); }
bool violation(ThreadID tid);
/** Gets the instruction that caused the memory ordering violation. */
O3DynInstPtr
getMemDepViolator(ThreadID tid)
{
return thread.at(tid).getMemDepViolator();
}
O3DynInstPtr getMemDepViolator(ThreadID tid);
/** Returns the head index of the load queue for a specific thread. */
int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); }
int getLoadHead(ThreadID tid);
/** Returns the sequence number of the head of the load queue. */
InstSeqNum
getLoadHeadSeqNum(ThreadID tid)
{
return thread.at(tid).getLoadHeadSeqNum();
}
InstSeqNum getLoadHeadSeqNum(ThreadID tid);
/** Returns the head index of the store queue. */
int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); }
int getStoreHead(ThreadID tid);
/** Returns the sequence number of the head of the store queue. */
InstSeqNum
getStoreHeadSeqNum(ThreadID tid)
{
return thread.at(tid).getStoreHeadSeqNum();
}
InstSeqNum getStoreHeadSeqNum(ThreadID tid);
/** Returns the number of instructions in all of the queues. */
int getCount();
/** Returns the number of instructions in the queues of one thread. */
int getCount(ThreadID tid) { return thread.at(tid).getCount(); }
int getCount(ThreadID tid);
/** Returns the total number of loads in the load queue. */
int numLoads();
/** Returns the total number of loads for a single thread. */
int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
int numLoads(ThreadID tid);
/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
int numStores(ThreadID tid) { return thread.at(tid).numStores(); }
int numStores(ThreadID tid);
// hardware transactional memory
int numHtmStarts(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].numHtmStarts();
}
int numHtmStops(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].numHtmStops();
}
void resetHtmStartsStops(ThreadID tid)
{
if (tid != InvalidThreadID)
thread[tid].resetHtmStartsStops();
}
uint64_t getLatestHtmUid(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].getLatestHtmUid();
}
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
{
if (tid != InvalidThreadID)
thread[tid].setLastRetiredHtmUid(htmUid);
}
int numHtmStarts(ThreadID tid) const;
int numHtmStops(ThreadID tid) const;
void resetHtmStartsStops(ThreadID tid);
uint64_t getLatestHtmUid(ThreadID tid) const;
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid);
/** Returns the number of free load entries. */
unsigned numFreeLoadEntries();
@@ -1000,22 +953,22 @@ class LSQ
/** Returns whether or not a specific thread has any stores to write back
* to memory.
*/
bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); }
bool hasStoresToWB(ThreadID tid);
/** Returns the number of stores a specific thread has to write back. */
int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); }
int numStoresToWB(ThreadID tid);
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
* cycle.
*/
bool willWB(ThreadID tid) { return thread.at(tid).willWB(); }
bool willWB(ThreadID tid);
/** Debugging function to print out all instructions. */
void dumpInsts() const;
/** Debugging function to print out instructions from a specific thread. */
void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); }
void dumpInsts(ThreadID tid) const;
/** Executes a read operation, using the load specified at the load
* index.
@@ -1122,28 +1075,10 @@ class LSQ
DcachePort dcachePort;
/** The LSQ units for individual threads. */
std::vector<LSQUnit<Impl>> thread;
std::vector<LSQUnit> thread;
/** Number of Threads. */
ThreadID numThreads;
};
template <class Impl>
Fault
LSQ<Impl>::read(LSQRequest* req, int load_idx)
{
ThreadID tid = cpu->contextToThread(req->request()->contextId());
return thread.at(tid).read(req, load_idx);
}
template <class Impl>
Fault
LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx)
{
ThreadID tid = cpu->contextToThread(req->request()->contextId());
return thread.at(tid).write(req, data, store_idx);
}
#endif // __CPU_O3_LSQ_HH__

View File

@@ -262,6 +262,20 @@ LSQ<Impl>::executeStore(const O3DynInstPtr &inst)
return thread[tid].executeStore(inst);
}
template<class Impl>
void
LSQ<Impl>::commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
{
thread.at(tid).commitLoads(youngest_inst);
}
template<class Impl>
void
LSQ<Impl>::commitStores(InstSeqNum &youngest_inst, ThreadID tid)
{
thread.at(tid).commitStores(youngest_inst);
}
template<class Impl>
void
LSQ<Impl>::writebackStores()
@@ -281,6 +295,13 @@ LSQ<Impl>::writebackStores()
}
}
template<class Impl>
void
LSQ<Impl>::squash(const InstSeqNum &squashed_num, ThreadID tid)
{
thread.at(tid).squash(squashed_num);
}
template<class Impl>
bool
LSQ<Impl>::violation()
@@ -299,6 +320,98 @@ LSQ<Impl>::violation()
return false;
}
template<class Impl>
bool LSQ<Impl>::violation(ThreadID tid) { return thread.at(tid).violation(); }
template<class Impl>
O3DynInstPtr
LSQ<Impl>::getMemDepViolator(ThreadID tid)
{
return thread.at(tid).getMemDepViolator();
}
template<class Impl>
int
LSQ<Impl>::getLoadHead(ThreadID tid)
{
return thread.at(tid).getLoadHead();
}
template<class Impl>
InstSeqNum
LSQ<Impl>::getLoadHeadSeqNum(ThreadID tid)
{
return thread.at(tid).getLoadHeadSeqNum();
}
template<class Impl>
int
LSQ<Impl>::getStoreHead(ThreadID tid)
{
return thread.at(tid).getStoreHead();
}
template<class Impl>
InstSeqNum
LSQ<Impl>::getStoreHeadSeqNum(ThreadID tid)
{
return thread.at(tid).getStoreHeadSeqNum();
}
template<class Impl>
int LSQ<Impl>::getCount(ThreadID tid) { return thread.at(tid).getCount(); }
template<class Impl>
int LSQ<Impl>::numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }
template<class Impl>
int LSQ<Impl>::numStores(ThreadID tid) { return thread.at(tid).numStores(); }
template<class Impl>
int
LSQ<Impl>::numHtmStarts(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].numHtmStarts();
}
template<class Impl>
int
LSQ<Impl>::numHtmStops(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].numHtmStops();
}
template<class Impl>
void
LSQ<Impl>::resetHtmStartsStops(ThreadID tid)
{
if (tid != InvalidThreadID)
thread[tid].resetHtmStartsStops();
}
template<class Impl>
uint64_t
LSQ<Impl>::getLatestHtmUid(ThreadID tid) const
{
if (tid == InvalidThreadID)
return 0;
else
return thread[tid].getLatestHtmUid();
}
template<class Impl>
void
LSQ<Impl>::setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
{
if (tid != InvalidThreadID)
thread[tid].setLastRetiredHtmUid(htmUid);
}
template <class Impl>
void
LSQ<Impl>::recvReqRetry()
@@ -651,6 +764,20 @@ LSQ<Impl>::hasStoresToWB()
return false;
}
template<class Impl>
bool
LSQ<Impl>::hasStoresToWB(ThreadID tid)
{
return thread.at(tid).hasStoresToWB();
}
template<class Impl>
int
LSQ<Impl>::numStoresToWB(ThreadID tid)
{
return thread.at(tid).numStoresToWB();
}
template<class Impl>
bool
LSQ<Impl>::willWB()
@@ -668,6 +795,13 @@ LSQ<Impl>::willWB()
return false;
}
template<class Impl>
bool
LSQ<Impl>::willWB(ThreadID tid)
{
return thread.at(tid).willWB();
}
template<class Impl>
void
LSQ<Impl>::dumpInsts() const
@@ -682,6 +816,13 @@ LSQ<Impl>::dumpInsts() const
}
}
template<class Impl>
void
LSQ<Impl>::dumpInsts(ThreadID tid) const
{
thread.at(tid).dumpInsts();
}
template<class Impl>
Fault
LSQ<Impl>::pushRequest(const O3DynInstPtr& inst, bool isLoad, uint8_t *data,
@@ -961,7 +1102,7 @@ LSQ<Impl>::SplitDataRequest::initiateTranslation()
template<class Impl>
LSQ<Impl>::LSQRequest::LSQRequest(
LSQUnit<Impl> *port, const O3DynInstPtr& inst, bool isLoad) :
LSQUnit *port, const O3DynInstPtr& inst, bool isLoad) :
_state(State::NotIssued), _senderState(nullptr),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
@@ -976,7 +1117,7 @@ LSQ<Impl>::LSQRequest::LSQRequest(
template<class Impl>
LSQ<Impl>::LSQRequest::LSQRequest(
LSQUnit<Impl>* port, const O3DynInstPtr& inst, bool isLoad,
LSQUnit *port, const O3DynInstPtr& inst, bool isLoad,
const Addr& addr, const uint32_t& size, const Request::Flags& flags_,
PacketDataPtr data, uint64_t* res, AtomicOpFunctorPtr amo_op)
: _state(State::NotIssued), _senderState(nullptr),
@@ -1312,7 +1453,7 @@ LSQ<Impl>::DcachePort::recvReqRetry()
}
template<class Impl>
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit<Impl>* port,
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
const O3DynInstPtr& inst,
const Request::Flags& flags_) :
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
@@ -1366,4 +1507,22 @@ LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
panic("unexpected behaviour");
}
template <class Impl>
Fault
LSQ<Impl>::read(LSQRequest* req, int load_idx)
{
ThreadID tid = cpu->contextToThread(req->request()->contextId());
return thread.at(tid).read(req, load_idx);
}
template <class Impl>
Fault
LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx)
{
ThreadID tid = cpu->contextToThread(req->request()->contextId());
return thread.at(tid).write(req, data, store_idx);
}
#endif//__CPU_O3_LSQ_IMPL_HH__

File diff suppressed because it is too large Load Diff

View File

@@ -52,9 +52,12 @@
#include "arch/generic/vec_reg.hh"
#include "arch/locked_mem.hh"
#include "config/the_isa.hh"
#include "cpu/base.hh"
#include "cpu/inst_seq.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/dyn_inst_ptr.hh"
#include "cpu/o3/impl.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/timebuf.hh"
#include "debug/HtmCpu.hh"
@@ -80,14 +83,13 @@ class DefaultIEW;
* the LSQ until the store writes back. At that point the load is
* replayed.
*/
template <class Impl>
class LSQUnit
{
public:
static constexpr auto MaxDataBytes = MaxVecRegLenInBytes;
using LSQSenderState = typename LSQ<Impl>::LSQSenderState;
using LSQRequest = typename LSQ<Impl>::LSQRequest;
using LSQSenderState = typename LSQ<O3CPUImpl>::LSQSenderState;
using LSQRequest = typename LSQ<O3CPUImpl>::LSQRequest;
private:
class LSQEntry
{
@@ -123,10 +125,10 @@ class LSQUnit
}
void
set(const O3DynInstPtr& inst)
set(const O3DynInstPtr& new_inst)
{
assert(!_valid);
this->inst = inst;
inst = new_inst;
_valid = true;
_size = 0;
}
@@ -223,8 +225,9 @@ class LSQUnit
}
/** Initializes the LSQ unit with the specified number of entries. */
void init(FullO3CPU<Impl> *cpu_ptr, DefaultIEW<Impl> *iew_ptr,
const DerivO3CPUParams &params, LSQ<Impl> *lsq_ptr, unsigned id);
void init(FullO3CPU<O3CPUImpl> *cpu_ptr, DefaultIEW<O3CPUImpl> *iew_ptr,
const DerivO3CPUParams &params, LSQ<O3CPUImpl> *lsq_ptr,
unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
@@ -396,13 +399,13 @@ class LSQUnit
private:
/** Pointer to the CPU. */
FullO3CPU<Impl> *cpu;
FullO3CPU<O3CPUImpl> *cpu;
/** Pointer to the IEW stage. */
DefaultIEW<Impl> *iewStage;
DefaultIEW<O3CPUImpl> *iewStage;
/** Pointer to the LSQ. */
LSQ<Impl> *lsq;
LSQ<O3CPUImpl> *lsq;
/** Pointer to the dcache port. Used only for sending. */
RequestPort *dcachePort;
@@ -466,7 +469,7 @@ class LSQUnit
PacketPtr pkt;
/** The pointer to the LSQ unit that issued the store. */
LSQUnit<Impl> *lsqPtr;
LSQUnit *lsqPtr;
};
public:
@@ -543,13 +546,6 @@ class LSQUnit
/** The oldest load that caused a memory ordering violation. */
O3DynInstPtr memDepViolator;
/** Whether or not there is a packet that couldn't be sent because of
* a lack of cache ports. */
bool hasPendingRequest;
/** The packet that is pending free cache ports. */
LSQRequest* pendingRequest;
/** Flag for memory model. */
bool needsTSO;
@@ -595,24 +591,12 @@ class LSQUnit
int getLoadHead() { return loadQueue.head(); }
/** Returns the sequence number of the head load instruction. */
InstSeqNum
getLoadHeadSeqNum()
{
return loadQueue.front().valid()
? loadQueue.front().instruction()->seqNum
: 0;
}
InstSeqNum getLoadHeadSeqNum();
/** Returns the index of the head store instruction. */
int getStoreHead() { return storeQueue.head(); }
/** Returns the sequence number of the head store instruction. */
InstSeqNum
getStoreHeadSeqNum()
{
return storeQueue.front().valid()
? storeQueue.front().instruction()->seqNum
: 0;
}
InstSeqNum getStoreHeadSeqNum();
/** Returns whether or not the LSQ unit is stalled. */
bool isStalled() { return stalled; }

File diff suppressed because it is too large Load Diff