cpu: Fix cache blocked load behavior in o3 cpu

This patch fixes the load blocked/replay mechanism in the o3 cpu.  Rather than
flushing the entire pipeline, this patch replays loads once the cache becomes
unblocked.

Additionally, deferred memory instructions (loads which had conflicting stores),
when replayed would not respect the number of functional units (only respected
issue width).  This patch also corrects that.

Improvements over 20% have been observed on a microbenchmark designed to
exercise this behavior.
This commit is contained in:
Mitch Hayenga
2014-09-03 07:42:39 -04:00
parent 283935a6f0
commit 4f13f676aa
10 changed files with 203 additions and 256 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2013 ARM Limited
* Copyright (c) 2011-2014 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved.
*
@@ -413,6 +413,8 @@ InstructionQueue<Impl>::resetState()
nonSpecInsts.clear();
listOrder.clear();
deferredMemInsts.clear();
blockedMemInsts.clear();
retryMemInsts.clear();
}
template <class Impl>
@@ -734,13 +736,14 @@ InstructionQueue<Impl>::scheduleReadyInsts()
IssueStruct *i2e_info = issueToExecuteQueue->access(0);
DynInstPtr deferred_mem_inst;
int total_deferred_mem_issued = 0;
while (total_deferred_mem_issued < totalWidth &&
(deferred_mem_inst = getDeferredMemInstToExecute()) != 0) {
issueToExecuteQueue->access(0)->size++;
instsToExecute.push_back(deferred_mem_inst);
total_deferred_mem_issued++;
DynInstPtr mem_inst;
while (mem_inst = getDeferredMemInstToExecute()) {
addReadyMemInst(mem_inst);
}
// See if any cache blocked instructions are able to be executed
while (mem_inst = getBlockedMemInstToExecute()) {
addReadyMemInst(mem_inst);
}
// Have iterator to head of the list
@@ -751,12 +754,11 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// Increment the iterator.
// This will avoid trying to schedule a certain op class if there are no
// FUs that handle it.
int total_issued = 0;
ListOrderIt order_it = listOrder.begin();
ListOrderIt order_end_it = listOrder.end();
int total_issued = 0;
while (total_issued < (totalWidth - total_deferred_mem_issued) &&
order_it != order_end_it) {
while (total_issued < totalWidth && order_it != order_end_it) {
OpClass op_class = (*order_it).queueType;
assert(!readyInsts[op_class].empty());
@@ -874,7 +876,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
// @todo If the way deferred memory instructions are handeled due to
// translation changes then the deferredMemInsts condition should be removed
// from the code below.
if (total_issued || total_deferred_mem_issued || deferredMemInsts.size()) {
if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
cpu->activityThisCycle();
} else {
DPRINTF(IQ, "Not able to schedule any instructions.\n");
@@ -1050,7 +1052,7 @@ template <class Impl>
void
InstructionQueue<Impl>::replayMemInst(DynInstPtr &replay_inst)
{
memDepUnit[replay_inst->threadNumber].replay(replay_inst);
memDepUnit[replay_inst->threadNumber].replay();
}
template <class Impl>
@@ -1077,6 +1079,27 @@ InstructionQueue<Impl>::deferMemInst(DynInstPtr &deferred_inst)
deferredMemInsts.push_back(deferred_inst);
}
template <class Impl>
void
InstructionQueue<Impl>::blockMemInst(DynInstPtr &blocked_inst)
{
blocked_inst->translationStarted(false);
blocked_inst->translationCompleted(false);
blocked_inst->clearIssued();
blocked_inst->clearCanIssue();
blockedMemInsts.push_back(blocked_inst);
}
template <class Impl>
void
InstructionQueue<Impl>::cacheUnblocked()
{
retryMemInsts.splice(retryMemInsts.end(), blockedMemInsts);
// Get the CPU ticking again
cpu->wakeCPU();
}
template <class Impl>
typename Impl::DynInstPtr
InstructionQueue<Impl>::getDeferredMemInstToExecute()
@@ -1084,12 +1107,25 @@ InstructionQueue<Impl>::getDeferredMemInstToExecute()
for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end();
++it) {
if ((*it)->translationCompleted() || (*it)->isSquashed()) {
DynInstPtr ret = *it;
DynInstPtr mem_inst = *it;
deferredMemInsts.erase(it);
return ret;
return mem_inst;
}
}
return NULL;
return nullptr;
}
template <class Impl>
typename Impl::DynInstPtr
InstructionQueue<Impl>::getBlockedMemInstToExecute()
{
if (retryMemInsts.empty()) {
return nullptr;
} else {
DynInstPtr mem_inst = retryMemInsts.front();
retryMemInsts.pop_front();
return mem_inst;
}
}
template <class Impl>