cpu: fixed how O3 CPU executes an exit system call
When a thread executed an exit syscall in SE mode, the thread context was removed immediately in the same cycle, which left inflight squash operations and trap event incomplete. The problem happened when a new thread was assigned to the CPU later. The new thread started with some incomplete transactions of the previous thread (e.g., squashing). This problem could cause incorrect execution flow for the new thread (i.e., pc was not reset properly at the exit point), deadlock (i.e., some stage-to-stage signals were not reset) and incorrect rename map between logical and physical registers. This patch adds a new state called 'Halting' to the thread context and defers removing thread context from a CPU until a trap event initiated by an exit syscall execution is processed. This patch also makes sure that the removal of a thread context happens after all inflight transactions of the to-be-removed thread in the pipeline complete. Change-Id: If7ef1462fb8864e22b45371ee7ae67e2a5ad38b8 Reviewed-on: https://gem5-review.googlesource.com/c/8184 Reviewed-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Maintainer: Jason Lowe-Power <jason@lowepower.com>
This commit is contained in:
@@ -187,6 +187,9 @@ class DefaultCommit
|
||||
/** Initializes stage by sending back the number of free entries. */
|
||||
void startupStage();
|
||||
|
||||
/** Clear all thread-specific states */
|
||||
void clearStates(ThreadID tid);
|
||||
|
||||
/** Initializes the draining of commit. */
|
||||
void drain();
|
||||
|
||||
|
||||
@@ -364,6 +364,22 @@ DefaultCommit<Impl>::startupStage()
|
||||
cpu->activityThisCycle();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
commitStatus[tid] = Idle;
|
||||
changedROBNumEntries[tid] = false;
|
||||
checkEmptyROB[tid] = false;
|
||||
trapInFlight[tid] = false;
|
||||
committedStores[tid] = false;
|
||||
trapSquash[tid] = false;
|
||||
tcSquash[tid] = false;
|
||||
pc[tid].set(0);
|
||||
lastCommitedSeqNum[tid] = 0;
|
||||
squashAfterInst[tid] = NULL;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultCommit<Impl>::drain()
|
||||
@@ -813,6 +829,13 @@ DefaultCommit<Impl>::commit()
|
||||
if (trapSquash[tid]) {
|
||||
assert(!tcSquash[tid]);
|
||||
squashFromTrap(tid);
|
||||
|
||||
// If the thread is trying to exit (i.e., an exit syscall was
|
||||
// executed), this trapSquash was originated by the exit
|
||||
// syscall earlier. In this case, schedule an exit event in
|
||||
// the next cycle to fully terminate this thread
|
||||
if (cpu->isThreadExiting(tid))
|
||||
cpu->scheduleThreadExitEvent(tid);
|
||||
} else if (tcSquash[tid]) {
|
||||
assert(commitStatus[tid] != TrapPending);
|
||||
squashFromTC(tid);
|
||||
|
||||
@@ -143,6 +143,8 @@ FullO3CPU<Impl>::FullO3CPU(DerivO3CPUParams *params)
|
||||
dtb(params->dtb),
|
||||
tickEvent([this]{ tick(); }, "FullO3CPU tick",
|
||||
false, Event::CPU_Tick_Pri),
|
||||
threadExitEvent([this]{ exitThreads(); }, "FullO3CPU exit threads",
|
||||
false, Event::CPU_Exit_Pri),
|
||||
#ifndef NDEBUG
|
||||
instcount(0),
|
||||
#endif
|
||||
@@ -810,7 +812,7 @@ void
|
||||
FullO3CPU<Impl>::haltContext(ThreadID tid)
|
||||
{
|
||||
//For now, this is the same as deallocate
|
||||
DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating", tid);
|
||||
DPRINTF(O3CPU,"[tid:%i]: Halt Context called. Deallocating\n", tid);
|
||||
assert(!switchedOut());
|
||||
|
||||
deactivateThread(tid);
|
||||
@@ -886,51 +888,20 @@ FullO3CPU<Impl>::removeThread(ThreadID tid)
|
||||
// here to alleviate the case for double-freeing registers
|
||||
// in SMT workloads.
|
||||
|
||||
// Unbind Int Regs from Rename Map
|
||||
for (RegId reg_id(IntRegClass, 0); reg_id.index() < TheISA::NumIntRegs;
|
||||
reg_id.index()++) {
|
||||
PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
|
||||
scoreboard.unsetReg(phys_reg);
|
||||
freeList.addReg(phys_reg);
|
||||
}
|
||||
|
||||
// Unbind Float Regs from Rename Map
|
||||
for (RegId reg_id(FloatRegClass, 0); reg_id.index() < TheISA::NumFloatRegs;
|
||||
reg_id.index()++) {
|
||||
PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
|
||||
scoreboard.unsetReg(phys_reg);
|
||||
freeList.addReg(phys_reg);
|
||||
}
|
||||
|
||||
// Unbind Float Regs from Rename Map
|
||||
for (unsigned preg = 0; preg < TheISA::NumVecPredRegs; preg++) {
|
||||
PhysRegIdPtr phys_reg = renameMap[tid].lookup(
|
||||
RegId(VecPredRegClass, preg));
|
||||
scoreboard.unsetReg(phys_reg);
|
||||
freeList.addReg(phys_reg);
|
||||
}
|
||||
|
||||
// Unbind condition-code Regs from Rename Map
|
||||
for (RegId reg_id(CCRegClass, 0); reg_id.index() < TheISA::NumCCRegs;
|
||||
reg_id.index()++) {
|
||||
PhysRegIdPtr phys_reg = renameMap[tid].lookup(reg_id);
|
||||
scoreboard.unsetReg(phys_reg);
|
||||
freeList.addReg(phys_reg);
|
||||
}
|
||||
|
||||
// Squash Throughout Pipeline
|
||||
DynInstPtr inst = commit.rob->readHeadInst(tid);
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
fetch.squash(0, squash_seq_num, inst, tid);
|
||||
decode.squash(tid);
|
||||
rename.squash(squash_seq_num, tid);
|
||||
iew.squash(tid);
|
||||
iew.ldstQueue.squash(squash_seq_num, tid);
|
||||
commit.rob->squash(squash_seq_num, tid);
|
||||
|
||||
// clear all thread-specific states in each stage of the pipeline
|
||||
// since this thread is going to be completely removed from the CPU
|
||||
commit.clearStates(tid);
|
||||
fetch.clearStates(tid);
|
||||
decode.clearStates(tid);
|
||||
rename.clearStates(tid);
|
||||
iew.clearStates(tid);
|
||||
|
||||
// at this step, all instructions in the pipeline should be already
|
||||
// either committed successfully or squashed. All thread-specific
|
||||
// queues in the pipeline must be empty.
|
||||
assert(iew.instQueue.getCount(tid) == 0);
|
||||
assert(iew.ldstQueue.getCount(tid) == 0);
|
||||
assert(commit.rob->isEmpty(tid));
|
||||
|
||||
// Reset ROB/IQ/LSQ Entries
|
||||
|
||||
@@ -1884,5 +1855,78 @@ FullO3CPU<Impl>::updateThreadPriority()
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::addThreadToExitingList(ThreadID tid)
|
||||
{
|
||||
DPRINTF(O3CPU, "Thread %d is inserted to exitingThreads list\n", tid);
|
||||
|
||||
// make sure the thread is Active
|
||||
assert(std::find(activeThreads.begin(), activeThreads.end(), tid)
|
||||
!= activeThreads.end());
|
||||
|
||||
// make sure the thread has not been added to the list yet
|
||||
assert(exitingThreads.count(tid) == 0);
|
||||
|
||||
// add the thread to exitingThreads list to mark that this thread is
|
||||
// trying to exit. The boolean value in the pair denotes if a thread is
|
||||
// ready to exit. The thread is not ready to exit until the corresponding
|
||||
// exit trap event is processed in the future. Until then, it'll be still
|
||||
// an active thread that is trying to exit.
|
||||
exitingThreads.emplace(std::make_pair(tid, false));
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
FullO3CPU<Impl>::isThreadExiting(ThreadID tid) const
|
||||
{
|
||||
return exitingThreads.count(tid) == 1;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::scheduleThreadExitEvent(ThreadID tid)
|
||||
{
|
||||
assert(exitingThreads.count(tid) == 1);
|
||||
|
||||
// exit trap event has been processed. Now, the thread is ready to exit
|
||||
// and be removed from the CPU.
|
||||
exitingThreads[tid] = true;
|
||||
|
||||
// we schedule a threadExitEvent in the next cycle to properly clean
|
||||
// up the thread's states in the pipeline. threadExitEvent has lower
|
||||
// priority than tickEvent, so the cleanup will happen at the very end
|
||||
// of the next cycle after all pipeline stages complete their operations.
|
||||
// We want all stages to complete squashing instructions before doing
|
||||
// the cleanup.
|
||||
if (!threadExitEvent.scheduled()) {
|
||||
schedule(threadExitEvent, nextCycle());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
FullO3CPU<Impl>::exitThreads()
|
||||
{
|
||||
// there must be at least one thread trying to exit
|
||||
assert(exitingThreads.size() > 0);
|
||||
|
||||
// terminate all threads that are ready to exit
|
||||
auto it = exitingThreads.begin();
|
||||
while (it != exitingThreads.end()) {
|
||||
ThreadID thread_id = it->first;
|
||||
bool readyToExit = it->second;
|
||||
|
||||
if (readyToExit) {
|
||||
DPRINTF(O3CPU, "Exiting thread %d\n", thread_id);
|
||||
haltContext(thread_id);
|
||||
tcBase(thread_id)->setStatus(ThreadContext::Halted);
|
||||
it = exitingThreads.erase(it);
|
||||
} else {
|
||||
it++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Forward declaration of FullO3CPU.
|
||||
template class FullO3CPU<O3CPUImpl>;
|
||||
|
||||
@@ -205,6 +205,9 @@ class FullO3CPU : public BaseO3CPU
|
||||
/** The tick event used for scheduling CPU ticks. */
|
||||
EventFunctionWrapper tickEvent;
|
||||
|
||||
/** The exit event used for terminating all ready-to-exit threads */
|
||||
EventFunctionWrapper threadExitEvent;
|
||||
|
||||
/** Schedule tick event, regardless of its current state. */
|
||||
void scheduleTickEvent(Cycles delay)
|
||||
{
|
||||
@@ -331,6 +334,21 @@ class FullO3CPU : public BaseO3CPU
|
||||
void serializeThread(CheckpointOut &cp, ThreadID tid) const override;
|
||||
void unserializeThread(CheckpointIn &cp, ThreadID tid) override;
|
||||
|
||||
/** Insert tid to the list of threads trying to exit */
|
||||
void addThreadToExitingList(ThreadID tid);
|
||||
|
||||
/** Is the thread trying to exit? */
|
||||
bool isThreadExiting(ThreadID tid) const;
|
||||
|
||||
/**
|
||||
* If a thread is trying to exit and its corresponding trap event
|
||||
* has been completed, schedule an event to terminate the thread.
|
||||
*/
|
||||
void scheduleThreadExitEvent(ThreadID tid);
|
||||
|
||||
/** Terminate all threads that are ready to exit */
|
||||
void exitThreads();
|
||||
|
||||
public:
|
||||
/** Executes a syscall.
|
||||
* @todo: Determine if this needs to be virtual.
|
||||
@@ -648,6 +666,13 @@ class FullO3CPU : public BaseO3CPU
|
||||
/** Active Threads List */
|
||||
std::list<ThreadID> activeThreads;
|
||||
|
||||
/**
|
||||
* This is a list of threads that are trying to exit. Each thread id
|
||||
* is mapped to a boolean value denoting whether the thread is ready
|
||||
* to exit.
|
||||
*/
|
||||
std::unordered_map<ThreadID, bool> exitingThreads;
|
||||
|
||||
/** Integer Register Scoreboard */
|
||||
Scoreboard scoreboard;
|
||||
|
||||
|
||||
@@ -102,6 +102,10 @@ class DefaultDecode
|
||||
DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
void startupStage();
|
||||
|
||||
/** Clear all thread-specific states */
|
||||
void clearStates(ThreadID tid);
|
||||
|
||||
void resetStage();
|
||||
|
||||
/** Returns the name of decode. */
|
||||
|
||||
@@ -91,6 +91,14 @@ DefaultDecode<Impl>::startupStage()
|
||||
resetStage();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
decodeStatus[tid] = Idle;
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::resetStage()
|
||||
|
||||
@@ -216,6 +216,9 @@ class DefaultFetch
|
||||
/** Initialize stage. */
|
||||
void startupStage();
|
||||
|
||||
/** Clear all thread-specific states*/
|
||||
void clearStates(ThreadID tid);
|
||||
|
||||
/** Handles retrying the fetch access. */
|
||||
void recvReqRetry();
|
||||
|
||||
|
||||
@@ -326,6 +326,26 @@ DefaultFetch<Impl>::startupStage()
|
||||
switchToActive();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
fetchStatus[tid] = Running;
|
||||
pc[tid] = cpu->pcState(tid);
|
||||
fetchOffset[tid] = 0;
|
||||
macroop[tid] = NULL;
|
||||
delayedCommit[tid] = false;
|
||||
memReq[tid] = NULL;
|
||||
stalls[tid].decode = false;
|
||||
stalls[tid].drain = false;
|
||||
fetchBufferPC[tid] = 0;
|
||||
fetchBufferValid[tid] = false;
|
||||
fetchQueue[tid].clear();
|
||||
|
||||
// TODO not sure what to do with priorityList for now
|
||||
// priorityList.push_back(tid);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultFetch<Impl>::resetStage()
|
||||
|
||||
@@ -147,6 +147,9 @@ class DefaultIEW
|
||||
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
|
||||
void startupStage();
|
||||
|
||||
/** Clear all thread-specific states */
|
||||
void clearStates(ThreadID tid);
|
||||
|
||||
/** Sets main time buffer used for backwards communication. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
|
||||
@@ -323,6 +323,19 @@ DefaultIEW<Impl>::startupStage()
|
||||
cpu->activateStage(O3CPU::IEWIdx);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
toRename->iewInfo[tid].usedIQ = true;
|
||||
toRename->iewInfo[tid].freeIQEntries =
|
||||
instQueue.numFreeEntries(tid);
|
||||
|
||||
toRename->iewInfo[tid].usedLSQ = true;
|
||||
toRename->iewInfo[tid].freeLQEntries = ldstQueue.numFreeLoadEntries(tid);
|
||||
toRename->iewInfo[tid].freeSQEntries = ldstQueue.numFreeStoreEntries(tid);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultIEW<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
|
||||
|
||||
@@ -169,6 +169,9 @@ class DefaultRename
|
||||
/** Initializes variables for the stage. */
|
||||
void startupStage();
|
||||
|
||||
/** Clear all thread-specific states */
|
||||
void clearStates(ThreadID tid);
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
|
||||
@@ -254,6 +254,28 @@ DefaultRename<Impl>::startupStage()
|
||||
resetStage();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultRename<Impl>::clearStates(ThreadID tid)
|
||||
{
|
||||
renameStatus[tid] = Idle;
|
||||
|
||||
freeEntries[tid].iqEntries = iew_ptr->instQueue.numFreeEntries(tid);
|
||||
freeEntries[tid].lqEntries = iew_ptr->ldstQueue.numFreeLoadEntries(tid);
|
||||
freeEntries[tid].sqEntries = iew_ptr->ldstQueue.numFreeStoreEntries(tid);
|
||||
freeEntries[tid].robEntries = commit_ptr->numROBFreeEntries(tid);
|
||||
emptyROB[tid] = true;
|
||||
|
||||
stalls[tid].iew = false;
|
||||
serializeInst[tid] = NULL;
|
||||
|
||||
instsInProgress[tid] = 0;
|
||||
loadsInProgress[tid] = 0;
|
||||
storesInProgress[tid] = 0;
|
||||
|
||||
serializeOnNextInst[tid] = false;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultRename<Impl>::resetStage()
|
||||
|
||||
@@ -128,11 +128,18 @@ O3ThreadContext<Impl>::halt()
|
||||
{
|
||||
DPRINTF(O3CPU, "Calling halt on Thread Context %d\n", threadId());
|
||||
|
||||
if (thread->status() == ThreadContext::Halted)
|
||||
if (thread->status() == ThreadContext::Halting ||
|
||||
thread->status() == ThreadContext::Halted)
|
||||
return;
|
||||
|
||||
thread->setStatus(ThreadContext::Halted);
|
||||
cpu->haltContext(thread->threadId());
|
||||
// the thread is not going to halt/terminate immediately in this cycle.
|
||||
// The thread will be removed after an exit trap is processed
|
||||
// (e.g., after trapLatency cycles). Until then, the thread's status
|
||||
// will be Halting.
|
||||
thread->setStatus(ThreadContext::Halting);
|
||||
|
||||
// add this thread to the exiting list to mark that it is trying to exit.
|
||||
cpu->addThreadToExitingList(thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
|
||||
@@ -111,6 +111,10 @@ class ThreadContext
|
||||
/// synchronization, etc.
|
||||
Suspended,
|
||||
|
||||
/// Trying to exit and waiting for an event to completely exit.
|
||||
/// Entered when target executes an exit syscall.
|
||||
Halting,
|
||||
|
||||
/// Permanently shut down. Entered when target executes
|
||||
/// m5exit pseudo-instruction. When all contexts enter
|
||||
/// this state, the simulation will terminate.
|
||||
|
||||
@@ -161,6 +161,9 @@ class EventBase
|
||||
/// (such as writebacks).
|
||||
static const Priority CPU_Tick_Pri = 50;
|
||||
|
||||
/// If we want to exit a thread in a CPU, it comes after CPU_Tick_Pri
|
||||
static const Priority CPU_Exit_Pri = 64;
|
||||
|
||||
/// Statistics events (dump, reset, etc.) come after
|
||||
/// everything else, but before exit.
|
||||
static const Priority Stat_Event_Pri = 90;
|
||||
|
||||
Reference in New Issue
Block a user