cpu-o3: MemDepUnit tracks load-acquire/store-release
MemDepUnit tracks loads/stores that are also barriers, which is the case of load-acquire / store-release instructions. The tracking logic is also extended to consider multiple outstanding barriers. Change-Id: I95b0c710d7c7e4a138492177e3eaaf5143e9a0ba Signed-off-by: Tiago Mück <tiago.muck@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/27132 Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012, 2014 ARM Limited
|
||||
* Copyright (c) 2012, 2014, 2020 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -45,6 +45,7 @@
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
@@ -177,7 +178,7 @@ class MemDepUnit
|
||||
public:
|
||||
/** Constructs a memory dependence entry. */
|
||||
MemDepEntry(const DynInstPtr &new_inst)
|
||||
: inst(new_inst), regsReady(false), memDepReady(false),
|
||||
: inst(new_inst), regsReady(false), memDeps(0),
|
||||
completed(false), squashed(false)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
@@ -216,8 +217,8 @@ class MemDepUnit
|
||||
|
||||
/** If the registers are ready or not. */
|
||||
bool regsReady;
|
||||
/** If all memory dependencies have been satisfied. */
|
||||
bool memDepReady;
|
||||
/** Number of memory dependencies that need to be satisfied. */
|
||||
int memDeps;
|
||||
/** If the instruction is completed. */
|
||||
bool completed;
|
||||
/** If the instruction is squashed. */
|
||||
@@ -257,14 +258,20 @@ class MemDepUnit
|
||||
*/
|
||||
MemDepPred depPred;
|
||||
|
||||
/** Sequence numbers of outstanding load barriers. */
|
||||
std::unordered_set<InstSeqNum> loadBarrierSNs;
|
||||
|
||||
/** Sequence numbers of outstanding store barriers. */
|
||||
std::unordered_set<InstSeqNum> storeBarrierSNs;
|
||||
|
||||
/** Is there an outstanding load barrier that loads must wait on. */
|
||||
bool loadBarrier;
|
||||
/** The sequence number of the load barrier. */
|
||||
InstSeqNum loadBarrierSN;
|
||||
bool hasLoadBarrier() const { return !loadBarrierSNs.empty(); }
|
||||
|
||||
/** Is there an outstanding store barrier that loads must wait on. */
|
||||
bool storeBarrier;
|
||||
/** The sequence number of the store barrier. */
|
||||
InstSeqNum storeBarrierSN;
|
||||
bool hasStoreBarrier() const { return !storeBarrierSNs.empty(); }
|
||||
|
||||
/** Inserts the SN of a barrier inst. to the list of tracked barriers */
|
||||
void insertBarrierSN(const DynInstPtr &barr_inst);
|
||||
|
||||
/** Pointer to the IQ. */
|
||||
InstructionQueue<Impl> *iqPtr;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012, 2014 ARM Limited
|
||||
* Copyright (c) 2012, 2014, 2020 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -42,6 +42,7 @@
|
||||
#define __CPU_O3_MEM_DEP_UNIT_IMPL_HH__
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/mem_dep_unit.hh"
|
||||
@@ -50,8 +51,7 @@
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
MemDepUnit<MemDepPred, Impl>::MemDepUnit()
|
||||
: loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
|
||||
storeBarrierSN(0), iqPtr(NULL)
|
||||
: iqPtr(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -60,8 +60,7 @@ MemDepUnit<MemDepPred, Impl>::MemDepUnit(DerivO3CPUParams *params)
|
||||
: _name(params->name + ".memdepunit"),
|
||||
depPred(params->store_set_clear_period, params->SSITSize,
|
||||
params->LFSTSize),
|
||||
loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
|
||||
storeBarrierSN(0), iqPtr(NULL)
|
||||
iqPtr(NULL)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
|
||||
}
|
||||
@@ -155,8 +154,8 @@ void
|
||||
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
|
||||
{
|
||||
// Be sure to reset all state.
|
||||
loadBarrier = storeBarrier = false;
|
||||
loadBarrierSN = storeBarrierSN = 0;
|
||||
loadBarrierSNs.clear();
|
||||
storeBarrierSNs.clear();
|
||||
depPred.clear();
|
||||
}
|
||||
|
||||
@@ -167,6 +166,29 @@ MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
|
||||
iqPtr = iq_ptr;
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrierSN(const DynInstPtr &barr_inst)
|
||||
{
|
||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||
// Memory barriers block loads and stores, write barriers only stores.
|
||||
if (barr_inst->isMemBarrier()) {
|
||||
loadBarrierSNs.insert(barr_sn);
|
||||
storeBarrierSNs.insert(barr_sn);
|
||||
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
|
||||
barr_inst->pcState(), barr_sn);
|
||||
} else if (barr_inst->isWriteBarrier()) {
|
||||
storeBarrierSNs.insert(barr_sn);
|
||||
DPRINTF(MemDepUnit, "Inserted a write barrier %s SN:%lli\n",
|
||||
barr_inst->pcState(), barr_sn);
|
||||
}
|
||||
if (loadBarrierSNs.size() || storeBarrierSNs.size()) {
|
||||
DPRINTF(MemDepUnit, "Outstanding load barriers = %d; "
|
||||
"store barriers = %d\n",
|
||||
loadBarrierSNs.size(), storeBarrierSNs.size());
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
@@ -188,39 +210,46 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
|
||||
// Check any barriers and the dependence predictor for any
|
||||
// producing memrefs/stores.
|
||||
InstSeqNum producing_store;
|
||||
if ((inst->isLoad() || inst->isAtomic()) && loadBarrier) {
|
||||
DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
|
||||
loadBarrierSN);
|
||||
producing_store = loadBarrierSN;
|
||||
} else if ((inst->isStore() || inst->isAtomic()) && storeBarrier) {
|
||||
DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
|
||||
storeBarrierSN);
|
||||
producing_store = storeBarrierSN;
|
||||
std::vector<InstSeqNum> producing_stores;
|
||||
if ((inst->isLoad() || inst->isAtomic()) && hasLoadBarrier()) {
|
||||
DPRINTF(MemDepUnit, "%d load barriers in flight\n",
|
||||
loadBarrierSNs.size());
|
||||
producing_stores.insert(std::end(producing_stores),
|
||||
std::begin(loadBarrierSNs),
|
||||
std::end(loadBarrierSNs));
|
||||
} else if ((inst->isStore() || inst->isAtomic()) && hasStoreBarrier()) {
|
||||
DPRINTF(MemDepUnit, "%d store barriers in flight\n",
|
||||
storeBarrierSNs.size());
|
||||
producing_stores.insert(std::end(producing_stores),
|
||||
std::begin(storeBarrierSNs),
|
||||
std::end(storeBarrierSNs));
|
||||
} else {
|
||||
producing_store = depPred.checkInst(inst->instAddr());
|
||||
InstSeqNum dep = depPred.checkInst(inst->instAddr());
|
||||
if (dep != 0)
|
||||
producing_stores.push_back(dep);
|
||||
}
|
||||
|
||||
MemDepEntryPtr store_entry = NULL;
|
||||
std::vector<MemDepEntryPtr> store_entries;
|
||||
|
||||
// If there is a producing store, try to find the entry.
|
||||
if (producing_store != 0) {
|
||||
DPRINTF(MemDepUnit, "Searching for producer\n");
|
||||
for (auto producing_store : producing_stores) {
|
||||
DPRINTF(MemDepUnit, "Searching for producer [sn:%lli]\n",
|
||||
producing_store);
|
||||
MemDepHashIt hash_it = memDepHash.find(producing_store);
|
||||
|
||||
if (hash_it != memDepHash.end()) {
|
||||
store_entry = (*hash_it).second;
|
||||
DPRINTF(MemDepUnit, "Proucer found\n");
|
||||
store_entries.push_back((*hash_it).second);
|
||||
DPRINTF(MemDepUnit, "Producer found\n");
|
||||
}
|
||||
}
|
||||
|
||||
// If no store entry, then instruction can issue as soon as the registers
|
||||
// are ready.
|
||||
if (!store_entry) {
|
||||
if (store_entries.empty()) {
|
||||
DPRINTF(MemDepUnit, "No dependency for inst PC "
|
||||
"%s [sn:%lli].\n", inst->pcState(), inst->seqNum);
|
||||
|
||||
inst_entry->memDepReady = true;
|
||||
assert(inst_entry->memDeps == 0);
|
||||
|
||||
if (inst->readyToIssue()) {
|
||||
inst_entry->regsReady = true;
|
||||
@@ -229,8 +258,9 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
}
|
||||
} else {
|
||||
// Otherwise make the instruction dependent on the store/barrier.
|
||||
DPRINTF(MemDepUnit, "Adding to dependency list; "
|
||||
"inst PC %s is dependent on [sn:%lli].\n",
|
||||
DPRINTF(MemDepUnit, "Adding to dependency list\n");
|
||||
for (auto producing_store : producing_stores)
|
||||
DPRINTF(MemDepUnit, "\tinst PC %s is dependent on [sn:%lli].\n",
|
||||
inst->pcState(), producing_store);
|
||||
|
||||
if (inst->readyToIssue()) {
|
||||
@@ -241,7 +271,10 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
inst->clearCanIssue();
|
||||
|
||||
// Add this instruction to the list of dependents.
|
||||
store_entry->dependInsts.push_back(inst_entry);
|
||||
for (auto store_entry : store_entries)
|
||||
store_entry->dependInsts.push_back(inst_entry);
|
||||
|
||||
inst_entry->memDeps = store_entries.size();
|
||||
|
||||
if (inst->isLoad()) {
|
||||
++conflictingLoads;
|
||||
@@ -250,6 +283,9 @@ MemDepUnit<MemDepPred, Impl>::insert(const DynInstPtr &inst)
|
||||
}
|
||||
}
|
||||
|
||||
// for load-acquire store-release that could also be a barrier
|
||||
insertBarrierSN(inst);
|
||||
|
||||
if (inst->isStore() || inst->isAtomic()) {
|
||||
DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
@@ -268,21 +304,7 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertNonSpec(const DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
MemDepEntryPtr inst_entry = std::make_shared<MemDepEntry>(inst);
|
||||
|
||||
// Insert the MemDepEntry into the hash.
|
||||
memDepHash.insert(
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_insert++;
|
||||
#endif
|
||||
|
||||
// Add the instruction to the list.
|
||||
instList[tid].push_back(inst);
|
||||
|
||||
inst_entry->listIt = --(instList[tid].end());
|
||||
insertBarrier(inst);
|
||||
|
||||
// Might want to turn this part into an inline function or something.
|
||||
// It's shared between both insert functions.
|
||||
@@ -304,28 +326,13 @@ template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
{
|
||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||
// Memory barriers block loads and stores, write barriers only stores.
|
||||
if (barr_inst->isMemBarrier()) {
|
||||
loadBarrier = true;
|
||||
loadBarrierSN = barr_sn;
|
||||
storeBarrier = true;
|
||||
storeBarrierSN = barr_sn;
|
||||
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
|
||||
barr_inst->pcState(),barr_sn);
|
||||
} else if (barr_inst->isWriteBarrier()) {
|
||||
storeBarrier = true;
|
||||
storeBarrierSN = barr_sn;
|
||||
DPRINTF(MemDepUnit, "Inserted a write barrier\n");
|
||||
}
|
||||
|
||||
ThreadID tid = barr_inst->threadNumber;
|
||||
|
||||
MemDepEntryPtr inst_entry = std::make_shared<MemDepEntry>(barr_inst);
|
||||
|
||||
// Add the MemDepEntry to the hash.
|
||||
memDepHash.insert(
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(barr_inst->seqNum, inst_entry));
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_insert++;
|
||||
#endif
|
||||
@@ -334,6 +341,8 @@ MemDepUnit<MemDepPred, Impl>::insertBarrier(const DynInstPtr &barr_inst)
|
||||
instList[tid].push_back(barr_inst);
|
||||
|
||||
inst_entry->listIt = --(instList[tid].end());
|
||||
|
||||
insertBarrierSN(barr_inst);
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
@@ -348,7 +357,7 @@ MemDepUnit<MemDepPred, Impl>::regsReady(const DynInstPtr &inst)
|
||||
|
||||
inst_entry->regsReady = true;
|
||||
|
||||
if (inst_entry->memDepReady) {
|
||||
if (inst_entry->memDeps == 0) {
|
||||
DPRINTF(MemDepUnit, "Instruction has its memory "
|
||||
"dependencies resolved, adding it to the ready list.\n");
|
||||
|
||||
@@ -430,18 +439,19 @@ MemDepUnit<MemDepPred, Impl>::completeBarrier(const DynInstPtr &inst)
|
||||
{
|
||||
wakeDependents(inst);
|
||||
completed(inst);
|
||||
|
||||
InstSeqNum barr_sn = inst->seqNum;
|
||||
DPRINTF(MemDepUnit, "barrier completed: %s SN:%lli\n", inst->pcState(),
|
||||
inst->seqNum);
|
||||
if (inst->isMemBarrier()) {
|
||||
if (loadBarrierSN == barr_sn)
|
||||
loadBarrier = false;
|
||||
if (storeBarrierSN == barr_sn)
|
||||
storeBarrier = false;
|
||||
assert(hasLoadBarrier());
|
||||
assert(hasStoreBarrier());
|
||||
loadBarrierSNs.erase(barr_sn);
|
||||
storeBarrierSNs.erase(barr_sn);
|
||||
DPRINTF(MemDepUnit, "Memory barrier completed: %s SN:%lli\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
} else if (inst->isWriteBarrier()) {
|
||||
if (storeBarrierSN == barr_sn)
|
||||
storeBarrier = false;
|
||||
assert(hasStoreBarrier());
|
||||
storeBarrierSNs.erase(barr_sn);
|
||||
DPRINTF(MemDepUnit, "Write barrier completed: %s SN:%lli\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -469,10 +479,13 @@ MemDepUnit<MemDepPred, Impl>::wakeDependents(const DynInstPtr &inst)
|
||||
"[sn:%lli].\n",
|
||||
woken_inst->inst->seqNum);
|
||||
|
||||
if (woken_inst->regsReady && !woken_inst->squashed) {
|
||||
assert(woken_inst->memDeps > 0);
|
||||
woken_inst->memDeps -= 1;
|
||||
|
||||
if ((woken_inst->memDeps == 0) &&
|
||||
woken_inst->regsReady &&
|
||||
!woken_inst->squashed) {
|
||||
moveToReady(woken_inst);
|
||||
} else {
|
||||
woken_inst->memDepReady = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,11 +520,9 @@ MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
|
||||
DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
|
||||
(*squash_it)->seqNum);
|
||||
|
||||
if ((*squash_it)->seqNum == loadBarrierSN)
|
||||
loadBarrier = false;
|
||||
loadBarrierSNs.erase((*squash_it)->seqNum);
|
||||
|
||||
if ((*squash_it)->seqNum == storeBarrierSN)
|
||||
storeBarrier = false;
|
||||
storeBarrierSNs.erase((*squash_it)->seqNum);
|
||||
|
||||
hash_it = memDepHash.find((*squash_it)->seqNum);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user