cpu: HTM Implementation for O3CPU
JIRA: https://gem5.atlassian.net/browse/GEM5-587 Change-Id: I83787f4594963a15d856b81ad283b4f032d1c007 Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/30328 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Giacomo Travaglini
parent
79df434187
commit
46d7fdf1b6
@@ -61,6 +61,7 @@
|
||||
#include "cpu/op_class.hh"
|
||||
#include "cpu/static_inst.hh"
|
||||
#include "cpu/translation.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "sim/byteswap.hh"
|
||||
@@ -140,6 +141,7 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
IsStrictlyOrdered,
|
||||
ReqMade,
|
||||
MemOpDone,
|
||||
HtmFromTransaction,
|
||||
MaxFlags
|
||||
};
|
||||
|
||||
@@ -240,6 +242,11 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
// Need a copy of main request pointer to verify on writes.
|
||||
RequestPtr reqToVerify;
|
||||
|
||||
private:
|
||||
// hardware transactional memory
|
||||
uint64_t htmUid;
|
||||
uint64_t htmDepth;
|
||||
|
||||
protected:
|
||||
/** Flattened register index of the destination registers of this
|
||||
* instruction.
|
||||
@@ -548,8 +555,8 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
|
||||
uint64_t getHtmTransactionUid() const override
|
||||
{
|
||||
panic("Not yet implemented\n");
|
||||
return 0;
|
||||
assert(instFlags[HtmFromTransaction]);
|
||||
return this->htmUid;
|
||||
}
|
||||
|
||||
uint64_t newHtmTransactionUid() const override
|
||||
@@ -560,14 +567,35 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
|
||||
bool inHtmTransactionalState() const override
|
||||
{
|
||||
panic("Not yet implemented\n");
|
||||
return false;
|
||||
return instFlags[HtmFromTransaction];
|
||||
}
|
||||
|
||||
uint64_t getHtmTransactionalDepth() const override
|
||||
{
|
||||
panic("Not yet implemented\n");
|
||||
return 0;
|
||||
if (inHtmTransactionalState())
|
||||
return this->htmDepth;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void setHtmTransactionalState(uint64_t htm_uid, uint64_t htm_depth)
|
||||
{
|
||||
instFlags.set(HtmFromTransaction);
|
||||
htmUid = htm_uid;
|
||||
htmDepth = htm_depth;
|
||||
}
|
||||
|
||||
void clearHtmTransactionalState()
|
||||
{
|
||||
if (inHtmTransactionalState()) {
|
||||
DPRINTF(HtmCpu,
|
||||
"clearing instuction's transactional state htmUid=%u\n",
|
||||
getHtmTransactionUid());
|
||||
|
||||
instFlags.reset(HtmFromTransaction);
|
||||
htmUid = -1;
|
||||
htmDepth = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Temporarily sets this instruction as a serialize before instruction. */
|
||||
@@ -997,8 +1025,9 @@ template<class Impl>
|
||||
Fault
|
||||
BaseDynInst<Impl>::initiateHtmCmd(Request::Flags flags)
|
||||
{
|
||||
panic("Not yet implemented\n");
|
||||
return NoFault;
|
||||
return cpu->pushRequest(
|
||||
dynamic_cast<typename DynInstPtr::PtrType>(this),
|
||||
/* ld */ true, nullptr, 8, 0x0ul, flags, nullptr, nullptr);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
|
||||
@@ -95,6 +95,9 @@ BaseDynInst<Impl>::initVars()
|
||||
physEffAddr = 0;
|
||||
readyRegs = 0;
|
||||
memReqFlags = 0;
|
||||
// hardware transactional memory
|
||||
htmUid = -1;
|
||||
htmDepth = 0;
|
||||
|
||||
status.reset();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2012, 2014 ARM Limited
|
||||
* Copyright (c) 2010-2012, 2014, 2019 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -205,6 +205,12 @@ class DefaultCommit
|
||||
/** Deschedules a thread from scheduling */
|
||||
void deactivateThread(ThreadID tid);
|
||||
|
||||
/** Is the CPU currently processing a HTM transaction? */
|
||||
bool executingHtmTransaction(ThreadID) const;
|
||||
|
||||
/* Reset HTM tracking, e.g. after an abort */
|
||||
void resetHtmStartsStops(ThreadID);
|
||||
|
||||
/** Ticks the commit stage, which tries to commit instructions. */
|
||||
void tick();
|
||||
|
||||
@@ -473,6 +479,11 @@ class DefaultCommit
|
||||
/** Updates commit stats based on this instruction. */
|
||||
void updateComInstStats(const DynInstPtr &inst);
|
||||
|
||||
|
||||
// HTM
|
||||
int htmStarts[Impl::MaxThreads];
|
||||
int htmStops[Impl::MaxThreads];
|
||||
|
||||
/** Stat for the total number of squashed instructions discarded by commit.
|
||||
*/
|
||||
Stats::Scalar commitSquashedInsts;
|
||||
|
||||
@@ -60,6 +60,7 @@
|
||||
#include "debug/CommitRate.hh"
|
||||
#include "debug/Drain.hh"
|
||||
#include "debug/ExecFaulting.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/faults.hh"
|
||||
@@ -121,6 +122,8 @@ DefaultCommit<Impl>::DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params)
|
||||
committedStores[tid] = false;
|
||||
checkEmptyROB[tid] = false;
|
||||
renameMap[tid] = nullptr;
|
||||
htmStarts[tid] = 0;
|
||||
htmStops[tid] = 0;
|
||||
}
|
||||
interrupt = NoFault;
|
||||
}
|
||||
@@ -404,6 +407,14 @@ DefaultCommit<Impl>::drainSanityCheck() const
|
||||
{
|
||||
assert(isDrained());
|
||||
rob->drainSanityCheck();
|
||||
|
||||
// hardware transactional memory
|
||||
// cannot drain partially through a transaction
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
if (executingHtmTransaction(tid)) {
|
||||
panic("cannot drain partially through a HTM transaction");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -462,6 +473,27 @@ DefaultCommit<Impl>::deactivateThread(ThreadID tid)
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultCommit<Impl>::executingHtmTransaction(ThreadID tid) const
|
||||
{
|
||||
if (tid == InvalidThreadID)
|
||||
return false;
|
||||
else
|
||||
return (htmStarts[tid] > htmStops[tid]);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::resetHtmStartsStops(ThreadID tid)
|
||||
{
|
||||
if (tid != InvalidThreadID)
|
||||
{
|
||||
htmStarts[tid] = 0;
|
||||
htmStops[tid] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
@@ -532,6 +564,14 @@ DefaultCommit<Impl>::generateTrapEvent(ThreadID tid, Fault inst_fault)
|
||||
Cycles latency = dynamic_pointer_cast<SyscallRetryFault>(inst_fault) ?
|
||||
cpu->syscallRetryLatency : trapLatency;
|
||||
|
||||
// hardware transactional memory
|
||||
if (inst_fault != nullptr &&
|
||||
std::dynamic_pointer_cast<GenericHtmFailureFault>(inst_fault)) {
|
||||
// TODO
|
||||
// latency = default abort/restore latency
|
||||
// could also do some kind of exponential back off if desired
|
||||
}
|
||||
|
||||
cpu->schedule(trap, cpu->clockEdge(latency));
|
||||
trapInFlight[tid] = true;
|
||||
thread[tid]->trapPending = true;
|
||||
@@ -991,13 +1031,28 @@ DefaultCommit<Impl>::commitInsts()
|
||||
// Commit as many instructions as possible until the commit bandwidth
|
||||
// limit is reached, or it becomes impossible to commit any more.
|
||||
while (num_committed < commitWidth) {
|
||||
// Check for any interrupt that we've already squashed for
|
||||
// and start processing it.
|
||||
if (interrupt != NoFault)
|
||||
handleInterrupt();
|
||||
// hardware transactionally memory
|
||||
// If executing within a transaction,
|
||||
// need to handle interrupts specially
|
||||
|
||||
ThreadID commit_thread = getCommittingThread();
|
||||
|
||||
// Check for any interrupt that we've already squashed for
|
||||
// and start processing it.
|
||||
if (interrupt != NoFault) {
|
||||
// If inside a transaction, postpone interrupts
|
||||
if (executingHtmTransaction(commit_thread)) {
|
||||
cpu->clearInterrupts(0);
|
||||
toIEW->commitInfo[0].clearInterrupt = true;
|
||||
interrupt = NoFault;
|
||||
avoidQuiesceLiveLock = true;
|
||||
} else {
|
||||
handleInterrupt();
|
||||
}
|
||||
}
|
||||
|
||||
// ThreadID commit_thread = getCommittingThread();
|
||||
|
||||
if (commit_thread == -1 || !rob->isHeadReady(commit_thread))
|
||||
break;
|
||||
|
||||
@@ -1044,6 +1099,23 @@ DefaultCommit<Impl>::commitInsts()
|
||||
statCommittedInstType[tid][head_inst->opClass()]++;
|
||||
ppCommit->notify(head_inst);
|
||||
|
||||
// hardware transactional memory
|
||||
|
||||
// update nesting depth
|
||||
if (head_inst->isHtmStart())
|
||||
htmStarts[tid]++;
|
||||
|
||||
// sanity check
|
||||
if (head_inst->inHtmTransactionalState()) {
|
||||
assert(executingHtmTransaction(tid));
|
||||
} else {
|
||||
assert(!executingHtmTransaction(tid));
|
||||
}
|
||||
|
||||
// update nesting depth
|
||||
if (head_inst->isHtmStop())
|
||||
htmStops[tid]++;
|
||||
|
||||
changedROBNumEntries[tid] = true;
|
||||
|
||||
// Set the doneSeqNum to the youngest committed instruction.
|
||||
@@ -1206,6 +1278,23 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
|
||||
// Check if the instruction caused a fault. If so, trap.
|
||||
Fault inst_fault = head_inst->getFault();
|
||||
|
||||
// hardware transactional memory
|
||||
// if a fault occurred within a HTM transaction
|
||||
// ensure that the transaction aborts
|
||||
if (inst_fault != NoFault && head_inst->inHtmTransactionalState()) {
|
||||
// There exists a generic HTM fault common to all ISAs
|
||||
if (!std::dynamic_pointer_cast<GenericHtmFailureFault>(inst_fault)) {
|
||||
DPRINTF(HtmCpu, "%s - fault (%s) encountered within transaction"
|
||||
" - converting to GenericHtmFailureFault\n",
|
||||
head_inst->staticInst->getName(), inst_fault->name());
|
||||
inst_fault = std::make_shared<GenericHtmFailureFault>(
|
||||
head_inst->getHtmTransactionUid(),
|
||||
HtmFailureFaultCause::EXCEPTION);
|
||||
}
|
||||
// If this point is reached and the fault inherits from the HTM fault,
|
||||
// then there is no need to raise a new fault
|
||||
}
|
||||
|
||||
// Stores mark themselves as completed.
|
||||
if (!head_inst->isStore() && inst_fault == NoFault) {
|
||||
head_inst->setCompleted();
|
||||
@@ -1301,6 +1390,11 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
|
||||
head_inst->renamedDestRegIdx(i));
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
// the HTM UID is purely for correctness and debugging purposes
|
||||
if (head_inst->isHtmStart())
|
||||
iewStage->setLastRetiredHtmUid(tid, head_inst->getHtmTransactionUid());
|
||||
|
||||
// Finally clear the head ROB entry.
|
||||
rob->retireHead(tid);
|
||||
|
||||
|
||||
@@ -623,6 +623,10 @@ template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::deactivateThread(ThreadID tid)
|
||||
{
|
||||
// hardware transactional memory
|
||||
// shouldn't deactivate thread in the middle of a transaction
|
||||
assert(!commit.executingHtmTransaction(tid));
|
||||
|
||||
//Remove From Active List, if Active
|
||||
list<ThreadID>::iterator thread_it =
|
||||
std::find(activeThreads.begin(), activeThreads.end(), tid);
|
||||
@@ -1829,10 +1833,38 @@ FullO3CPU<Impl>::exitThreads()
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::htmSendAbortSignal(ThreadID tid, uint64_t htmUid,
|
||||
FullO3CPU<Impl>::htmSendAbortSignal(ThreadID tid, uint64_t htm_uid,
|
||||
HtmFailureFaultCause cause)
|
||||
{
|
||||
panic("not yet supported!");
|
||||
const Addr addr = 0x0ul;
|
||||
const int size = 8;
|
||||
const Request::Flags flags =
|
||||
Request::PHYSICAL|Request::STRICT_ORDER|Request::HTM_ABORT;
|
||||
|
||||
// O3-specific actions
|
||||
this->iew.ldstQueue.resetHtmStartsStops(tid);
|
||||
this->commit.resetHtmStartsStops(tid);
|
||||
|
||||
// notify l1 d-cache (ruby) that core has aborted transaction
|
||||
RequestPtr req =
|
||||
std::make_shared<Request>(addr, size, flags, _dataMasterId);
|
||||
|
||||
req->taskId(taskId());
|
||||
req->setContext(this->thread[tid]->contextId());
|
||||
req->setHtmAbortCause(cause);
|
||||
|
||||
assert(req->isHTMAbort());
|
||||
|
||||
PacketPtr abort_pkt = Packet::createRead(req);
|
||||
uint8_t *memData = new uint8_t[8];
|
||||
assert(memData);
|
||||
abort_pkt->dataStatic(memData);
|
||||
abort_pkt->setHtmTransactional(htm_uid);
|
||||
|
||||
// TODO include correct error handling here
|
||||
if (!this->iew.ldstQueue.getDataPort().sendTimingReq(abort_pkt)) {
|
||||
panic("HTM abort signal was not sent to the memory subsystem.");
|
||||
}
|
||||
}
|
||||
|
||||
// Forward declaration of FullO3CPU.
|
||||
|
||||
@@ -61,7 +61,6 @@
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/simple_thread.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
//#include "cpu/o3/thread_context.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/process.hh"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2012, 2014 ARM Limited
|
||||
* Copyright (c) 2010-2012, 2014, 2019 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -233,6 +233,16 @@ class DefaultIEW
|
||||
/** Check misprediction */
|
||||
void checkMisprediction(const DynInstPtr &inst);
|
||||
|
||||
// hardware transactional memory
|
||||
// For debugging purposes, it is useful to keep track of the most recent
|
||||
// htmUid that has been committed (architecturally, not transactionally)
|
||||
// to ensure that the core and the memory subsystem are observing
|
||||
// correct ordering constraints.
|
||||
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
|
||||
{
|
||||
ldstQueue.setLastRetiredHtmUid(tid, htmUid);
|
||||
}
|
||||
|
||||
private:
|
||||
/** Sends commit proper information for a squash due to a branch
|
||||
* mispredict.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2013, 2018 ARM Limited
|
||||
* Copyright (c) 2010-2013, 2018-2019 ARM Limited
|
||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
@@ -1051,6 +1051,20 @@ DefaultIEW<Impl>::dispatchInsts(ThreadID tid)
|
||||
break;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
// CPU needs to track transactional state in program order.
|
||||
const int numHtmStarts = ldstQueue.numHtmStarts(tid);
|
||||
const int numHtmStops = ldstQueue.numHtmStops(tid);
|
||||
const int htmDepth = numHtmStarts - numHtmStops;
|
||||
|
||||
if (htmDepth > 0) {
|
||||
inst->setHtmTransactionalState(ldstQueue.getLatestHtmUid(tid),
|
||||
htmDepth);
|
||||
} else {
|
||||
inst->clearHtmTransactionalState();
|
||||
}
|
||||
|
||||
|
||||
// Otherwise issue the instruction just fine.
|
||||
if (inst->isAtomic()) {
|
||||
DPRINTF(IEW, "[tid:%i] Issue: Memory instruction "
|
||||
|
||||
@@ -687,6 +687,8 @@ class LSQ
|
||||
{
|
||||
flags.set(Flag::Complete);
|
||||
}
|
||||
|
||||
virtual std::string name() const { return "LSQRequest"; }
|
||||
};
|
||||
|
||||
class SingleDataRequest : public LSQRequest
|
||||
@@ -739,6 +741,35 @@ class LSQ
|
||||
virtual void buildPackets();
|
||||
virtual Cycles handleLocalAccess(ThreadContext *thread, PacketPtr pkt);
|
||||
virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
|
||||
virtual std::string name() const { return "SingleDataRequest"; }
|
||||
};
|
||||
|
||||
// hardware transactional memory
|
||||
// This class extends SingleDataRequest for the sole purpose
|
||||
// of encapsulating hardware transactional memory command requests
|
||||
class HtmCmdRequest : public SingleDataRequest
|
||||
{
|
||||
protected:
|
||||
/* Given that we are inside templates, children need explicit
|
||||
* declaration of the names in the parent class. */
|
||||
using Flag = typename LSQRequest::Flag;
|
||||
using State = typename LSQRequest::State;
|
||||
using LSQRequest::_addr;
|
||||
using LSQRequest::_size;
|
||||
using LSQRequest::_byteEnable;
|
||||
using LSQRequest::_requests;
|
||||
using LSQRequest::_inst;
|
||||
using LSQRequest::_taskId;
|
||||
using LSQRequest::flags;
|
||||
using LSQRequest::setState;
|
||||
public:
|
||||
HtmCmdRequest(LSQUnit* port, const DynInstPtr& inst,
|
||||
const Request::Flags& flags_);
|
||||
inline virtual ~HtmCmdRequest() {}
|
||||
virtual void initiateTranslation();
|
||||
virtual void finish(const Fault &fault, const RequestPtr &req,
|
||||
ThreadContext* tc, BaseTLB::Mode mode);
|
||||
virtual std::string name() const { return "HtmCmdRequest"; }
|
||||
};
|
||||
|
||||
class SplitDataRequest : public LSQRequest
|
||||
@@ -815,6 +846,7 @@ class LSQ
|
||||
|
||||
virtual RequestPtr mainRequest();
|
||||
virtual PacketPtr mainPacket();
|
||||
virtual std::string name() const { return "SplitDataRequest"; }
|
||||
};
|
||||
|
||||
/** Constructs an LSQ with the given parameters. */
|
||||
@@ -933,6 +965,44 @@ class LSQ
|
||||
/** Returns the total number of stores for a single thread. */
|
||||
int numStores(ThreadID tid) { return thread.at(tid).numStores(); }
|
||||
|
||||
|
||||
// hardware transactional memory
|
||||
|
||||
int numHtmStarts(ThreadID tid) const
|
||||
{
|
||||
if (tid == InvalidThreadID)
|
||||
return 0;
|
||||
else
|
||||
return thread[tid].numHtmStarts();
|
||||
}
|
||||
int numHtmStops(ThreadID tid) const
|
||||
{
|
||||
if (tid == InvalidThreadID)
|
||||
return 0;
|
||||
else
|
||||
return thread[tid].numHtmStops();
|
||||
}
|
||||
|
||||
void resetHtmStartsStops(ThreadID tid)
|
||||
{
|
||||
if (tid != InvalidThreadID)
|
||||
thread[tid].resetHtmStartsStops();
|
||||
}
|
||||
|
||||
uint64_t getLatestHtmUid(ThreadID tid) const
|
||||
{
|
||||
if (tid == InvalidThreadID)
|
||||
return 0;
|
||||
else
|
||||
return thread[tid].getLatestHtmUid();
|
||||
}
|
||||
|
||||
void setLastRetiredHtmUid(ThreadID tid, uint64_t htmUid)
|
||||
{
|
||||
if (tid != InvalidThreadID)
|
||||
thread[tid].setLastRetiredHtmUid(htmUid);
|
||||
}
|
||||
|
||||
/** Returns the number of free load entries. */
|
||||
unsigned numFreeLoadEntries();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
|
||||
* Copyright (c) 2011-2012, 2014, 2017-2019 ARM Limited
|
||||
* Copyright (c) 2013 Advanced Micro Devices, Inc.
|
||||
* All rights reserved
|
||||
*
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "debug/Drain.hh"
|
||||
#include "debug/Fetch.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
#include "debug/LSQ.hh"
|
||||
#include "debug/Writeback.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
@@ -706,11 +707,17 @@ LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
|
||||
// lines. For now, such cross-line update is not supported.
|
||||
assert(!isAtomic || (isAtomic && !needs_burst));
|
||||
|
||||
const bool htm_cmd = isLoad && (flags & Request::HTM_CMD);
|
||||
|
||||
if (inst->translationStarted()) {
|
||||
req = inst->savedReq;
|
||||
assert(req);
|
||||
} else {
|
||||
if (needs_burst) {
|
||||
if (htm_cmd) {
|
||||
assert(addr == 0x0lu);
|
||||
assert(size == 8);
|
||||
req = new HtmCmdRequest(&thread[tid], inst, flags);
|
||||
} else if (needs_burst) {
|
||||
req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
|
||||
size, flags, data, res);
|
||||
} else {
|
||||
@@ -1033,6 +1040,23 @@ LSQ<Impl>::SingleDataRequest::buildPackets()
|
||||
: Packet::createWrite(request()));
|
||||
_packets.back()->dataStatic(_inst->memData);
|
||||
_packets.back()->senderState = _senderState;
|
||||
|
||||
// hardware transactional memory
|
||||
// If request originates in a transaction (not necessarily a HtmCmd),
|
||||
// then the packet should be marked as such.
|
||||
if (_inst->inHtmTransactionalState()) {
|
||||
_packets.back()->setHtmTransactional(
|
||||
_inst->getHtmTransactionUid());
|
||||
|
||||
DPRINTF(HtmCpu,
|
||||
"HTM %s pc=0x%lx - vaddr=0x%lx - paddr=0x%lx - htmUid=%u\n",
|
||||
isLoad() ? "LD" : "ST",
|
||||
_inst->instAddr(),
|
||||
_packets.back()->req->hasVaddr() ?
|
||||
_packets.back()->req->getVaddr() : 0lu,
|
||||
_packets.back()->getAddr(),
|
||||
_inst->getHtmTransactionUid());
|
||||
}
|
||||
}
|
||||
assert(_packets.size() == 1);
|
||||
}
|
||||
@@ -1049,6 +1073,21 @@ LSQ<Impl>::SplitDataRequest::buildPackets()
|
||||
if (isLoad()) {
|
||||
_mainPacket = Packet::createRead(mainReq);
|
||||
_mainPacket->dataStatic(_inst->memData);
|
||||
|
||||
// hardware transactional memory
|
||||
// If request originates in a transaction,
|
||||
// packet should be marked as such
|
||||
if (_inst->inHtmTransactionalState()) {
|
||||
_mainPacket->setHtmTransactional(
|
||||
_inst->getHtmTransactionUid());
|
||||
DPRINTF(HtmCpu,
|
||||
"HTM LD.0 pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
|
||||
_inst->instAddr(),
|
||||
_mainPacket->req->hasVaddr() ?
|
||||
_mainPacket->req->getVaddr() : 0lu,
|
||||
_mainPacket->getAddr(),
|
||||
_inst->getHtmTransactionUid());
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < _requests.size() && _fault[i] == NoFault; i++) {
|
||||
RequestPtr r = _requests[i];
|
||||
@@ -1066,6 +1105,23 @@ LSQ<Impl>::SplitDataRequest::buildPackets()
|
||||
}
|
||||
pkt->senderState = _senderState;
|
||||
_packets.push_back(pkt);
|
||||
|
||||
// hardware transactional memory
|
||||
// If request originates in a transaction,
|
||||
// packet should be marked as such
|
||||
if (_inst->inHtmTransactionalState()) {
|
||||
_packets.back()->setHtmTransactional(
|
||||
_inst->getHtmTransactionUid());
|
||||
DPRINTF(HtmCpu,
|
||||
"HTM %s.%d pc=0x%lx-vaddr=0x%lx-paddr=0x%lx-htmUid=%u\n",
|
||||
isLoad() ? "LD" : "ST",
|
||||
i+1,
|
||||
_inst->instAddr(),
|
||||
_packets.back()->req->hasVaddr() ?
|
||||
_packets.back()->req->getVaddr() : 0lu,
|
||||
_packets.back()->getAddr(),
|
||||
_inst->getHtmTransactionUid());
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(_packets.size() > 0);
|
||||
@@ -1192,4 +1248,59 @@ LSQ<Impl>::DcachePort::recvReqRetry()
|
||||
lsq->recvReqRetry();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
LSQ<Impl>::HtmCmdRequest::HtmCmdRequest(LSQUnit* port,
|
||||
const DynInstPtr& inst,
|
||||
const Request::Flags& flags_) :
|
||||
SingleDataRequest(port, inst, true, 0x0lu, 8, flags_,
|
||||
nullptr, nullptr, nullptr)
|
||||
{
|
||||
assert(_requests.size() == 0);
|
||||
|
||||
this->addRequest(_addr, _size, _byteEnable);
|
||||
|
||||
if (_requests.size() > 0) {
|
||||
_requests.back()->setReqInstSeqNum(_inst->seqNum);
|
||||
_requests.back()->taskId(_taskId);
|
||||
_requests.back()->setPaddr(_addr);
|
||||
_requests.back()->setInstCount(_inst->getCpuPtr()->totalInsts());
|
||||
|
||||
_inst->strictlyOrdered(_requests.back()->isStrictlyOrdered());
|
||||
_inst->fault = NoFault;
|
||||
_inst->physEffAddr = _requests.back()->getPaddr();
|
||||
_inst->memReqFlags = _requests.back()->getFlags();
|
||||
_inst->savedReq = this;
|
||||
|
||||
setState(State::Translation);
|
||||
} else {
|
||||
panic("unexpected behaviour");
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::HtmCmdRequest::initiateTranslation()
|
||||
{
|
||||
// Transaction commands are implemented as loads to avoid significant
|
||||
// changes to the cpu and memory interfaces
|
||||
// The virtual and physical address uses a dummy value of 0x00
|
||||
// Address translation does not really occur thus the code below
|
||||
|
||||
flags.set(Flag::TranslationStarted);
|
||||
flags.set(Flag::TranslationFinished);
|
||||
|
||||
_inst->translationStarted(true);
|
||||
_inst->translationCompleted(true);
|
||||
|
||||
setState(State::Request);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::HtmCmdRequest::finish(const Fault &fault, const RequestPtr &req,
|
||||
ThreadContext* tc, BaseTLB::Mode mode)
|
||||
{
|
||||
panic("unexpected behaviour");
|
||||
}
|
||||
|
||||
#endif//__CPU_O3_LSQ_IMPL_HH__
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
#include "debug/LSQUnit.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/port.hh"
|
||||
@@ -312,6 +313,21 @@ class LSQUnit
|
||||
/** Returns the number of stores in the SQ. */
|
||||
int numStores() { return stores; }
|
||||
|
||||
// hardware transactional memory
|
||||
int numHtmStarts() const { return htmStarts; }
|
||||
int numHtmStops() const { return htmStops; }
|
||||
void resetHtmStartsStops() { htmStarts = htmStops = 0; }
|
||||
uint64_t getLatestHtmUid() const
|
||||
{
|
||||
const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
|
||||
return htm_cpt->getHtmUid();
|
||||
}
|
||||
void setLastRetiredHtmUid(uint64_t htm_uid)
|
||||
{
|
||||
assert(htm_uid >= lastRetiredHtmUid);
|
||||
lastRetiredHtmUid = htm_uid;
|
||||
}
|
||||
|
||||
/** Returns if either the LQ or SQ is full. */
|
||||
bool isFull() { return lqFull() || sqFull(); }
|
||||
|
||||
@@ -496,6 +512,13 @@ class LSQUnit
|
||||
/** The number of store instructions in the SQ waiting to writeback. */
|
||||
int storesToWB;
|
||||
|
||||
// hardware transactional memory
|
||||
// nesting depth
|
||||
int htmStarts;
|
||||
int htmStops;
|
||||
// sanity checks and debugging
|
||||
uint64_t lastRetiredHtmUid;
|
||||
|
||||
/** The index of the first instruction that may be ready to be
|
||||
* written back, and has not yet been written back.
|
||||
*/
|
||||
@@ -665,6 +688,7 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
|
||||
if (req->mainRequest()->isLocalAccess()) {
|
||||
assert(!load_inst->memData);
|
||||
assert(!load_inst->inHtmTransactionalState());
|
||||
load_inst->memData = new uint8_t[MaxDataBytes];
|
||||
|
||||
ThreadContext *thread = cpu->tcBase(lsqID);
|
||||
@@ -679,6 +703,37 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMStart() || req->mainRequest()->isHTMCommit())
|
||||
{
|
||||
// don't want to send nested transactionStarts and
|
||||
// transactionStops outside of core, e.g. to Ruby
|
||||
if (req->mainRequest()->getFlags().isSet(Request::NO_ACCESS)) {
|
||||
Cycles delay(0);
|
||||
PacketPtr data_pkt =
|
||||
new Packet(req->mainRequest(), MemCmd::ReadReq);
|
||||
|
||||
// Allocate memory if this is the first time a load is issued.
|
||||
if (!load_inst->memData) {
|
||||
load_inst->memData =
|
||||
new uint8_t[req->mainRequest()->getSize()];
|
||||
// sanity checks espect zero in request's data
|
||||
memset(load_inst->memData, 0, req->mainRequest()->getSize());
|
||||
}
|
||||
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
data_pkt->makeResponse();
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
|
||||
cpu->schedule(wb, cpu->clockEdge(delay));
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the SQ for any previous stores that might lead to forwarding
|
||||
auto store_it = load_inst->sqIt;
|
||||
assert (store_it >= storeWBIt);
|
||||
@@ -771,6 +826,35 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
MemCmd::ReadReq);
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
// hardware transactional memory
|
||||
// Store to load forwarding within a transaction
|
||||
// This should be okay because the store will be sent to
|
||||
// the memory subsystem and subsequently get added to the
|
||||
// write set of the transaction. The write set has a stronger
|
||||
// property than the read set, so the load doesn't necessarily
|
||||
// have to be there.
|
||||
assert(!req->mainRequest()->isHTMCmd());
|
||||
if (load_inst->inHtmTransactionalState()) {
|
||||
assert (!storeQueue[store_it._idx].completed());
|
||||
assert (
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
inHtmTransactionalState());
|
||||
assert (
|
||||
load_inst->getHtmTransactionUid() ==
|
||||
storeQueue[store_it._idx].instruction()->
|
||||
getHtmTransactionUid());
|
||||
data_pkt->setHtmTransactional(
|
||||
load_inst->getHtmTransactionUid());
|
||||
DPRINTF(HtmCpu, "HTM LD (ST2LDF) "
|
||||
"pc=0x%lx - vaddr=0x%lx - "
|
||||
"paddr=0x%lx - htmUid=%u\n",
|
||||
load_inst->instAddr(),
|
||||
data_pkt->req->hasVaddr() ?
|
||||
data_pkt->req->getVaddr() : 0lu,
|
||||
data_pkt->getAddr(),
|
||||
load_inst->getHtmTransactionUid());
|
||||
}
|
||||
|
||||
if (req->isAnyOutstandingRequest()) {
|
||||
assert(req->_numOutstandingPackets > 0);
|
||||
// There are memory requests packets in flight already.
|
||||
@@ -841,6 +925,15 @@ LSQUnit<Impl>::read(LSQRequest *req, int load_idx)
|
||||
load_inst->memData = new uint8_t[req->mainRequest()->getSize()];
|
||||
}
|
||||
|
||||
|
||||
// hardware transactional memory
|
||||
if (req->mainRequest()->isHTMCmd()) {
|
||||
// this is a simple sanity check
|
||||
// the Ruby cache controller will set
|
||||
// memData to 0x0ul if successful.
|
||||
*load_inst->memData = (uint64_t) 0x1ull;
|
||||
}
|
||||
|
||||
// For now, load throughput is constrained by the number of
|
||||
// load FUs only, and loads do not consume a cache port (only
|
||||
// stores do).
|
||||
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "cpu/o3/lsq_unit.hh"
|
||||
#include "debug/Activity.hh"
|
||||
#include "debug/HtmCpu.hh"
|
||||
#include "debug/IEW.hh"
|
||||
#include "debug/LSQUnit.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
@@ -112,6 +113,59 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||
LSQSenderState *state = dynamic_cast<LSQSenderState *>(pkt->senderState);
|
||||
DynInstPtr inst = state->inst;
|
||||
|
||||
// hardware transactional memory
|
||||
// sanity check
|
||||
if (pkt->isHtmTransactional() && !inst->isSquashed()) {
|
||||
assert(inst->getHtmTransactionUid() == pkt->getHtmTransactionUid());
|
||||
}
|
||||
|
||||
// if in a HTM transaction, it's possible
|
||||
// to abort within the cache hierarchy.
|
||||
// This is signalled back to the processor
|
||||
// through responses to memory requests.
|
||||
if (pkt->htmTransactionFailedInCache()) {
|
||||
// cannot do this for write requests because
|
||||
// they cannot tolerate faults
|
||||
const HtmCacheFailure htm_rc =
|
||||
pkt->getHtmTransactionFailedInCacheRC();
|
||||
if(pkt->isWrite()) {
|
||||
DPRINTF(HtmCpu,
|
||||
"store notification (ignored) of HTM transaction failure "
|
||||
"in cache - addr=0x%lx - rc=%s - htmUid=%d\n",
|
||||
pkt->getAddr(), htmFailureToStr(htm_rc),
|
||||
pkt->getHtmTransactionUid());
|
||||
} else {
|
||||
HtmFailureFaultCause fail_reason =
|
||||
HtmFailureFaultCause::INVALID;
|
||||
|
||||
if (htm_rc == HtmCacheFailure::FAIL_SELF) {
|
||||
fail_reason = HtmFailureFaultCause::SIZE;
|
||||
} else if (htm_rc == HtmCacheFailure::FAIL_REMOTE) {
|
||||
fail_reason = HtmFailureFaultCause::MEMORY;
|
||||
} else if (htm_rc == HtmCacheFailure::FAIL_OTHER) {
|
||||
// these are likely loads that were issued out of order
|
||||
// they are faulted here, but it's unlikely that these will
|
||||
// ever reach the commit head.
|
||||
fail_reason = HtmFailureFaultCause::OTHER;
|
||||
} else {
|
||||
panic("HTM error - unhandled return code from cache (%s)",
|
||||
htmFailureToStr(htm_rc));
|
||||
}
|
||||
|
||||
inst->fault =
|
||||
std::make_shared<GenericHtmFailureFault>(
|
||||
inst->getHtmTransactionUid(),
|
||||
fail_reason);
|
||||
|
||||
DPRINTF(HtmCpu,
|
||||
"load notification of HTM transaction failure "
|
||||
"in cache - pc=%s - addr=0x%lx - "
|
||||
"rc=%u - htmUid=%d\n",
|
||||
inst->pcState(), pkt->getAddr(),
|
||||
htmFailureToStr(htm_rc), pkt->getHtmTransactionUid());
|
||||
}
|
||||
}
|
||||
|
||||
cpu->ppDataAccessComplete->notify(std::make_pair(inst, pkt));
|
||||
|
||||
/* Notify the sender state that the access is complete (for ownership
|
||||
@@ -125,6 +179,13 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||
// after receving the response from the memory
|
||||
assert(inst->isLoad() || inst->isStoreConditional() ||
|
||||
inst->isAtomic());
|
||||
|
||||
// hardware transactional memory
|
||||
if (pkt->htmTransactionFailedInCache()) {
|
||||
state->request()->mainPacket()->setHtmTransactionFailedInCache(
|
||||
pkt->getHtmTransactionFailedInCacheRC() );
|
||||
}
|
||||
|
||||
writeback(inst, state->request()->mainPacket());
|
||||
if (inst->isStore() || inst->isAtomic()) {
|
||||
auto ss = dynamic_cast<SQSenderState*>(state);
|
||||
@@ -142,7 +203,10 @@ LSQUnit<Impl>::completeDataAccess(PacketPtr pkt)
|
||||
template <class Impl>
|
||||
LSQUnit<Impl>::LSQUnit(uint32_t lqEntries, uint32_t sqEntries)
|
||||
: lsqID(-1), storeQueue(sqEntries+1), loadQueue(lqEntries+1),
|
||||
loads(0), stores(0), storesToWB(0), cacheBlockMask(0), stalled(false),
|
||||
loads(0), stores(0), storesToWB(0),
|
||||
htmStarts(0), htmStops(0),
|
||||
lastRetiredHtmUid(0),
|
||||
cacheBlockMask(0), stalled(false),
|
||||
isStoreBlocked(false), storeInFlight(false), hasPendingRequest(false),
|
||||
pendingRequest(nullptr)
|
||||
{
|
||||
@@ -176,6 +240,9 @@ LSQUnit<Impl>::resetState()
|
||||
{
|
||||
loads = stores = storesToWB = 0;
|
||||
|
||||
// hardware transactional memory
|
||||
// nesting depth
|
||||
htmStarts = htmStops = 0;
|
||||
|
||||
storeWBIt = storeQueue.begin();
|
||||
|
||||
@@ -306,6 +373,45 @@ LSQUnit<Impl>::insertLoad(const DynInstPtr &load_inst)
|
||||
load_inst->lqIt = loadQueue.getIterator(load_inst->lqIdx);
|
||||
|
||||
++loads;
|
||||
|
||||
// hardware transactional memory
|
||||
// transactional state and nesting depth must be tracked
|
||||
// in the in-order part of the core.
|
||||
if (load_inst->isHtmStart()) {
|
||||
htmStarts++;
|
||||
DPRINTF(HtmCpu, ">> htmStarts++ (%d) : htmStops (%d)\n",
|
||||
htmStarts, htmStops);
|
||||
|
||||
const int htm_depth = htmStarts - htmStops;
|
||||
const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
|
||||
auto htm_uid = htm_cpt->getHtmUid();
|
||||
|
||||
// for debugging purposes
|
||||
if (!load_inst->inHtmTransactionalState()) {
|
||||
htm_uid = htm_cpt->newHtmUid();
|
||||
DPRINTF(HtmCpu, "generating new htmUid=%u\n", htm_uid);
|
||||
if (htm_depth != 1) {
|
||||
DPRINTF(HtmCpu,
|
||||
"unusual HTM transactional depth (%d)"
|
||||
" possibly caused by mispeculation - htmUid=%u\n",
|
||||
htm_depth, htm_uid);
|
||||
}
|
||||
}
|
||||
load_inst->setHtmTransactionalState(htm_uid, htm_depth);
|
||||
}
|
||||
|
||||
if (load_inst->isHtmStop()) {
|
||||
htmStops++;
|
||||
DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops++ (%d)\n",
|
||||
htmStarts, htmStops);
|
||||
|
||||
if (htmStops==1 && htmStarts==0) {
|
||||
DPRINTF(HtmCpu,
|
||||
"htmStops==1 && htmStarts==0. "
|
||||
"This generally shouldn't happen "
|
||||
"(unless due to misspeculation)\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@@ -831,6 +937,7 @@ LSQUnit<Impl>::writebackStores()
|
||||
|
||||
if (req->request()->isLocalAccess()) {
|
||||
assert(!inst->isStoreConditional());
|
||||
assert(!inst->inHtmTransactionalState());
|
||||
ThreadContext *thread = cpu->tcBase(lsqID);
|
||||
PacketPtr main_pkt = new Packet(req->mainRequest(),
|
||||
MemCmd::WriteReq);
|
||||
@@ -876,6 +983,21 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
|
||||
stallingLoadIdx = 0;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
// Squashing instructions can alter the transaction nesting depth
|
||||
// and must be corrected before fetching resumes.
|
||||
if (loadQueue.back().instruction()->isHtmStart())
|
||||
{
|
||||
htmStarts = (--htmStarts < 0) ? 0 : htmStarts;
|
||||
DPRINTF(HtmCpu, ">> htmStarts-- (%d) : htmStops (%d)\n",
|
||||
htmStarts, htmStops);
|
||||
}
|
||||
if (loadQueue.back().instruction()->isHtmStop())
|
||||
{
|
||||
htmStops = (--htmStops < 0) ? 0 : htmStops;
|
||||
DPRINTF(HtmCpu, ">> htmStarts (%d) : htmStops-- (%d)\n",
|
||||
htmStarts, htmStops);
|
||||
}
|
||||
// Clear the smart pointer to make sure it is decremented.
|
||||
loadQueue.back().instruction()->setSquashed();
|
||||
loadQueue.back().clear();
|
||||
@@ -886,6 +1008,40 @@ LSQUnit<Impl>::squash(const InstSeqNum &squashed_num)
|
||||
++lsqSquashedLoads;
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
// scan load queue (from oldest to youngest) for most recent valid htmUid
|
||||
auto scan_it = loadQueue.begin();
|
||||
uint64_t in_flight_uid = 0;
|
||||
while (scan_it != loadQueue.end()) {
|
||||
if (scan_it->instruction()->isHtmStart() &&
|
||||
!scan_it->instruction()->isSquashed()) {
|
||||
in_flight_uid = scan_it->instruction()->getHtmTransactionUid();
|
||||
DPRINTF(HtmCpu, "loadQueue[%d]: found valid HtmStart htmUid=%u\n",
|
||||
scan_it._idx, in_flight_uid);
|
||||
}
|
||||
scan_it++;
|
||||
}
|
||||
// If there's a HtmStart in the pipeline then use its htmUid,
|
||||
// otherwise use the most recently committed uid
|
||||
const auto& htm_cpt = cpu->tcBase(lsqID)->getHtmCheckpointPtr();
|
||||
if (htm_cpt) {
|
||||
const uint64_t old_local_htm_uid = htm_cpt->getHtmUid();
|
||||
uint64_t new_local_htm_uid;
|
||||
if (in_flight_uid > 0)
|
||||
new_local_htm_uid = in_flight_uid;
|
||||
else
|
||||
new_local_htm_uid = lastRetiredHtmUid;
|
||||
|
||||
if (old_local_htm_uid != new_local_htm_uid) {
|
||||
DPRINTF(HtmCpu, "flush: lastRetiredHtmUid=%u\n",
|
||||
lastRetiredHtmUid);
|
||||
DPRINTF(HtmCpu, "flush: resetting localHtmUid=%u\n",
|
||||
new_local_htm_uid);
|
||||
|
||||
htm_cpt->setHtmUid(new_local_htm_uid);
|
||||
}
|
||||
}
|
||||
|
||||
if (memDepViolator && squashed_num < memDepViolator->seqNum) {
|
||||
memDepViolator = NULL;
|
||||
}
|
||||
@@ -965,7 +1121,7 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
|
||||
|
||||
// Squashed instructions do not need to complete their access.
|
||||
if (inst->isSquashed()) {
|
||||
assert(!inst->isStore());
|
||||
assert (!inst->isStore() || inst->isStoreConditional());
|
||||
++lsqIgnoredResponses;
|
||||
return;
|
||||
}
|
||||
@@ -983,8 +1139,27 @@ LSQUnit<Impl>::writeback(const DynInstPtr &inst, PacketPtr pkt)
|
||||
// If we have an outstanding fault, the fault should only be of
|
||||
// type ReExec or - in case of a SplitRequest - a partial
|
||||
// translation fault
|
||||
assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
|
||||
inst->savedReq->isPartialFault());
|
||||
|
||||
// Unless it's a hardware transactional memory fault
|
||||
auto htm_fault = std::dynamic_pointer_cast<
|
||||
GenericHtmFailureFault>(inst->fault);
|
||||
|
||||
if (!htm_fault) {
|
||||
assert(dynamic_cast<ReExec*>(inst->fault.get()) != nullptr ||
|
||||
inst->savedReq->isPartialFault());
|
||||
|
||||
} else if (!pkt->htmTransactionFailedInCache()) {
|
||||
// Situation in which the instruction has a hardware transactional
|
||||
// memory fault but not the packet itself. This can occur with
|
||||
// ldp_uop microops since access is spread over multiple packets.
|
||||
DPRINTF(HtmCpu,
|
||||
"%s writeback with HTM failure fault, "
|
||||
"however, completing packet is not aware of "
|
||||
"transaction failure. cause=%s htmUid=%u\n",
|
||||
inst->staticInst->getName(),
|
||||
htmFailureToStr(htm_fault->getHtmFailureFaultCause()),
|
||||
htm_fault->getHtmUid());
|
||||
}
|
||||
|
||||
DPRINTF(LSQUnit, "Not completing instruction [sn:%lli] access "
|
||||
"due to pending fault.\n", inst->seqNum);
|
||||
|
||||
@@ -172,7 +172,9 @@ MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
|
||||
{
|
||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||
// Memory barriers block loads and stores, write barriers only stores.
|
||||
if (barr_inst->isMemBarrier()) {
|
||||
// Required also for hardware transactional memory commands which
|
||||
// can have strict ordering semantics
|
||||
if (barr_inst->isMemBarrier() || barr_inst->isHtmCmd()) {
|
||||
loadBarrierSNs.insert(barr_sn);
|
||||
storeBarrierSNs.insert(barr_sn);
|
||||
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
|
||||
@@ -182,6 +184,7 @@ MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
|
||||
DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n",
|
||||
barr_inst->pcState(), barr_sn);
|
||||
}
|
||||
|
||||
if (loadBarrierSNs.size() || storeBarrierSNs.size()) {
|
||||
DPRINTF(MemDepUnit, "Outstanding load barriers = %d; "
|
||||
"store barriers = %d\n",
|
||||
@@ -440,7 +443,8 @@ MemDepUnit<MemDepPred, Impl>::completeInst(const DynInstPtr &inst)
|
||||
wakeDependents(inst);
|
||||
completed(inst);
|
||||
InstSeqNum barr_sn = inst->seqNum;
|
||||
if (inst->isMemBarrier()) {
|
||||
|
||||
if (inst->isMemBarrier() || inst->isHtmCmd()) {
|
||||
assert(hasLoadBarrier());
|
||||
assert(hasStoreBarrier());
|
||||
loadBarrierSNs.erase(barr_sn);
|
||||
@@ -459,9 +463,10 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
|
||||
{
|
||||
// Only stores, atomics and barriers have dependents.
|
||||
// Only stores, atomics, barriers and
|
||||
// hardware transactional memory commands have dependents.
|
||||
if (!inst->isStore() && !inst->isAtomic() && !inst->isMemBarrier() &&
|
||||
!inst->isWriteBarrier()) {
|
||||
!inst->isWriteBarrier() && !inst->isHtmCmd()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -331,21 +331,24 @@ void
|
||||
O3ThreadContext<Impl>::htmAbortTransaction(uint64_t htmUid,
|
||||
HtmFailureFaultCause cause)
|
||||
{
|
||||
panic("function not implemented\n");
|
||||
cpu->htmSendAbortSignal(thread->threadId(), htmUid, cause);
|
||||
|
||||
conditionalSquash();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
BaseHTMCheckpointPtr&
|
||||
O3ThreadContext<Impl>::getHtmCheckpointPtr()
|
||||
{
|
||||
panic("function not implemented\n");
|
||||
return thread->htmCheckpoint;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setHtmCheckpointPtr(BaseHTMCheckpointPtr new_cpt)
|
||||
{
|
||||
panic("function not implemented\n");
|
||||
assert(!thread->htmCheckpoint->valid());
|
||||
thread->htmCheckpoint = std::move(new_cpt);
|
||||
}
|
||||
|
||||
#endif //__CPU_O3_THREAD_CONTEXT_IMPL_HH__
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012 ARM Limited
|
||||
* Copyright (c) 2012, 2019 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -92,6 +92,9 @@ struct O3ThreadState : public ThreadState {
|
||||
*/
|
||||
bool trapPending;
|
||||
|
||||
/** Pointer to the hardware transactional memory checkpoint. */
|
||||
std::unique_ptr<BaseHTMCheckpoint> htmCheckpoint;
|
||||
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process)
|
||||
: ThreadState(_cpu, _thread_num, _process), cpu(_cpu),
|
||||
comInstEventQueue("instruction-based event queue"),
|
||||
|
||||
Reference in New Issue
Block a user