From 2801cc08c066a02a03f6a81349ced260cdebd8b0 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 3 Mar 2021 01:50:18 -0800 Subject: [PATCH] cpu: De-templatize the O3 MemDepUnit. Change-Id: I4c1d6b1246fe4ca9f8a9cc9d434ca20f512f8d2f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42105 Reviewed-by: Gabe Black Maintainer: Gabe Black Tested-by: kokoro --- src/cpu/o3/inst_queue.hh | 2 +- src/cpu/o3/mem_dep_unit.cc | 613 +++++++++++++++++++++++++++++- src/cpu/o3/mem_dep_unit.hh | 46 +-- src/cpu/o3/mem_dep_unit_impl.hh | 648 -------------------------------- 4 files changed, 617 insertions(+), 692 deletions(-) delete mode 100644 src/cpu/o3/mem_dep_unit_impl.hh diff --git a/src/cpu/o3/inst_queue.hh b/src/cpu/o3/inst_queue.hh index e261d8fd2c..87c7981224 100644 --- a/src/cpu/o3/inst_queue.hh +++ b/src/cpu/o3/inst_queue.hh @@ -292,7 +292,7 @@ class InstructionQueue /** The memory dependence unit, which tracks/predicts memory dependences * between instructions. */ - MemDepUnit memDepUnit[O3MaxThreads]; + MemDepUnit memDepUnit[O3MaxThreads]; /** The queue to the execute stage. Issued instructions will be written * into it. diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc index 963d614b31..5f46cd7c8b 100644 --- a/src/cpu/o3/mem_dep_unit.cc +++ b/src/cpu/o3/mem_dep_unit.cc @@ -26,17 +26,610 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "cpu/o3/isa_specific.hh" -#include "cpu/o3/mem_dep_unit_impl.hh" +#include "cpu/o3/mem_dep_unit.hh" + +#include +#include +#include + +#include "base/compiler.hh" +#include "base/debug.hh" +#include "cpu/o3/dyn_inst.hh" +#include "cpu/o3/inst_queue.hh" +#include "cpu/o3/limits.hh" +#include "debug/MemDepUnit.hh" +#include "params/DerivO3CPU.hh" #ifdef DEBUG -template <> -int MemDepUnit::MemDepEntry::memdep_count = 0; -template <> -int MemDepUnit::MemDepEntry::memdep_insert = 0; -template <> -int MemDepUnit::MemDepEntry::memdep_erase = 0; +int MemDepUnit::MemDepEntry::memdep_count = 0; +int MemDepUnit::MemDepEntry::memdep_insert = 0; +int MemDepUnit::MemDepEntry::memdep_erase = 0; #endif -// Force instantation of memory dependency unit using O3CPUImpl. -template class MemDepUnit; +MemDepUnit::MemDepUnit() : iqPtr(NULL), stats(nullptr) {} + +MemDepUnit::MemDepUnit(const DerivO3CPUParams ¶ms) + : _name(params.name + ".memdepunit"), + depPred(params.store_set_clear_period, params.SSITSize, + params.LFSTSize), + iqPtr(NULL), + stats(nullptr) +{ + DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); +} + +MemDepUnit::~MemDepUnit() +{ + for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { + + ListIt inst_list_it = instList[tid].begin(); + + MemDepHashIt hash_it; + + while (!instList[tid].empty()) { + hash_it = memDepHash.find((*inst_list_it)->seqNum); + + assert(hash_it != memDepHash.end()); + + memDepHash.erase(hash_it); + + instList[tid].erase(inst_list_it++); + } + } + +#ifdef DEBUG + assert(MemDepEntry::memdep_count == 0); +#endif +} + +void +MemDepUnit::init(const DerivO3CPUParams ¶ms, ThreadID tid, + FullO3CPU *cpu) +{ + DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid); + + _name = csprintf("%s.memDep%d", params.name, tid); + id = tid; + + depPred.init(params.store_set_clear_period, params.SSITSize, + params.LFSTSize); + + std::string stats_group_name = csprintf("MemDepUnit__%i", tid); + cpu->addStatGroup(stats_group_name.c_str(), &stats); +} + +MemDepUnit::MemDepUnitStats::MemDepUnitStats(Stats::Group *parent) + : Stats::Group(parent), + ADD_STAT(insertedLoads, Stats::Units::Count::get(), + "Number of loads inserted to the mem dependence unit."), + ADD_STAT(insertedStores, Stats::Units::Count::get(), + "Number of stores inserted to the mem dependence unit."), + ADD_STAT(conflictingLoads, Stats::Units::Count::get(), + "Number of conflicting loads."), + ADD_STAT(conflictingStores, Stats::Units::Count::get(), + "Number of conflicting stores.") +{ +} + +bool +MemDepUnit::isDrained() const +{ + bool drained = instsToReplay.empty() + && memDepHash.empty() + && instsToReplay.empty(); + for (int i = 0; i < O3MaxThreads; ++i) + drained = drained && instList[i].empty(); + + return drained; +} + +void +MemDepUnit::drainSanityCheck() const +{ + assert(instsToReplay.empty()); + assert(memDepHash.empty()); + for (int i = 0; i < O3MaxThreads; ++i) + assert(instList[i].empty()); + assert(instsToReplay.empty()); + assert(memDepHash.empty()); +} + +void +MemDepUnit::takeOverFrom() +{ + // Be sure to reset all state. + loadBarrierSNs.clear(); + storeBarrierSNs.clear(); + depPred.clear(); +} + +void +MemDepUnit::setIQ(InstructionQueue *iq_ptr) +{ + iqPtr = iq_ptr; +} + +void +MemDepUnit::insertBarrierSN(const O3DynInstPtr &barr_inst) +{ + InstSeqNum barr_sn = barr_inst->seqNum; + + if (barr_inst->isReadBarrier() || barr_inst->isHtmCmd()) + loadBarrierSNs.insert(barr_sn); + if (barr_inst->isWriteBarrier() || barr_inst->isHtmCmd()) + storeBarrierSNs.insert(barr_sn); + + if (Debug::MemDepUnit) { + const char *barrier_type = nullptr; + if (barr_inst->isReadBarrier() && barr_inst->isWriteBarrier()) + barrier_type = "memory"; + else if (barr_inst->isReadBarrier()) + barrier_type = "read"; + else if (barr_inst->isWriteBarrier()) + barrier_type = "write"; + + if (barrier_type) { + DPRINTF(MemDepUnit, "Inserted a %s barrier %s SN:%lli\n", + barrier_type, barr_inst->pcState(), barr_sn); + } + + if (loadBarrierSNs.size() || storeBarrierSNs.size()) { + DPRINTF(MemDepUnit, "Outstanding load barriers = %d; " + "store barriers = %d\n", + loadBarrierSNs.size(), storeBarrierSNs.size()); + } + } +} + +void +MemDepUnit::insert(const O3DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + MemDepEntryPtr inst_entry = std::make_shared(inst); + + // Add the MemDepEntry to the hash. + memDepHash.insert( + std::pair(inst->seqNum, inst_entry)); +#ifdef DEBUG + MemDepEntry::memdep_insert++; +#endif + + instList[tid].push_back(inst); + + inst_entry->listIt = --(instList[tid].end()); + + // Check any barriers and the dependence predictor for any + // producing memrefs/stores. + std::vector producing_stores; + if ((inst->isLoad() || inst->isAtomic()) && hasLoadBarrier()) { + DPRINTF(MemDepUnit, "%d load barriers in flight\n", + loadBarrierSNs.size()); + producing_stores.insert(std::end(producing_stores), + std::begin(loadBarrierSNs), + std::end(loadBarrierSNs)); + } else if ((inst->isStore() || inst->isAtomic()) && hasStoreBarrier()) { + DPRINTF(MemDepUnit, "%d store barriers in flight\n", + storeBarrierSNs.size()); + producing_stores.insert(std::end(producing_stores), + std::begin(storeBarrierSNs), + std::end(storeBarrierSNs)); + } else { + InstSeqNum dep = depPred.checkInst(inst->instAddr()); + if (dep != 0) + producing_stores.push_back(dep); + } + + std::vector store_entries; + + // If there is a producing store, try to find the entry. + for (auto producing_store : producing_stores) { + DPRINTF(MemDepUnit, "Searching for producer [sn:%lli]\n", + producing_store); + MemDepHashIt hash_it = memDepHash.find(producing_store); + + if (hash_it != memDepHash.end()) { + store_entries.push_back((*hash_it).second); + DPRINTF(MemDepUnit, "Producer found\n"); + } + } + + // If no store entry, then instruction can issue as soon as the registers + // are ready. + if (store_entries.empty()) { + DPRINTF(MemDepUnit, "No dependency for inst PC " + "%s [sn:%lli].\n", inst->pcState(), inst->seqNum); + + assert(inst_entry->memDeps == 0); + + if (inst->readyToIssue()) { + inst_entry->regsReady = true; + + moveToReady(inst_entry); + } + } else { + // Otherwise make the instruction dependent on the store/barrier. + DPRINTF(MemDepUnit, "Adding to dependency list\n"); + for (GEM5_VAR_USED auto producing_store : producing_stores) + DPRINTF(MemDepUnit, "\tinst PC %s is dependent on [sn:%lli].\n", + inst->pcState(), producing_store); + + if (inst->readyToIssue()) { + inst_entry->regsReady = true; + } + + // Clear the bit saying this instruction can issue. + inst->clearCanIssue(); + + // Add this instruction to the list of dependents. + for (auto store_entry : store_entries) + store_entry->dependInsts.push_back(inst_entry); + + inst_entry->memDeps = store_entries.size(); + + if (inst->isLoad()) { + ++stats.conflictingLoads; + } else { + ++stats.conflictingStores; + } + } + + // for load-acquire store-release that could also be a barrier + insertBarrierSN(inst); + + if (inst->isStore() || inst->isAtomic()) { + DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n", + inst->pcState(), inst->seqNum); + + depPred.insertStore(inst->instAddr(), inst->seqNum, + inst->threadNumber); + + ++stats.insertedStores; + } else if (inst->isLoad()) { + ++stats.insertedLoads; + } else { + panic("Unknown type! (most likely a barrier)."); + } +} + +void +MemDepUnit::insertNonSpec(const O3DynInstPtr &inst) +{ + insertBarrier(inst); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore() || inst->isAtomic()) { + DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n", + inst->pcState(), inst->seqNum); + + depPred.insertStore(inst->instAddr(), inst->seqNum, + inst->threadNumber); + + ++stats.insertedStores; + } else if (inst->isLoad()) { + ++stats.insertedLoads; + } else { + panic("Unknown type! (most likely a barrier)."); + } +} + +void +MemDepUnit::insertBarrier(const O3DynInstPtr &barr_inst) +{ + ThreadID tid = barr_inst->threadNumber; + + MemDepEntryPtr inst_entry = std::make_shared(barr_inst); + + // Add the MemDepEntry to the hash. + memDepHash.insert( + std::pair(barr_inst->seqNum, inst_entry)); +#ifdef DEBUG + MemDepEntry::memdep_insert++; +#endif + + // Add the instruction to the instruction list. + instList[tid].push_back(barr_inst); + + inst_entry->listIt = --(instList[tid].end()); + + insertBarrierSN(barr_inst); +} + +void +MemDepUnit::regsReady(const O3DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "Marking registers as ready for " + "instruction PC %s [sn:%lli].\n", + inst->pcState(), inst->seqNum); + + MemDepEntryPtr inst_entry = findInHash(inst); + + inst_entry->regsReady = true; + + if (inst_entry->memDeps == 0) { + DPRINTF(MemDepUnit, "Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(inst_entry); + } else { + DPRINTF(MemDepUnit, "Instruction still waiting on " + "memory dependency.\n"); + } +} + +void +MemDepUnit::nonSpecInstReady(const O3DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "Marking non speculative " + "instruction PC %s as ready [sn:%lli].\n", + inst->pcState(), inst->seqNum); + + MemDepEntryPtr inst_entry = findInHash(inst); + + moveToReady(inst_entry); +} + +void +MemDepUnit::reschedule(const O3DynInstPtr &inst) +{ + instsToReplay.push_back(inst); +} + +void +MemDepUnit::replay() +{ + O3DynInstPtr temp_inst; + + // For now this replay function replays all waiting memory ops. + while (!instsToReplay.empty()) { + temp_inst = instsToReplay.front(); + + MemDepEntryPtr inst_entry = findInHash(temp_inst); + + DPRINTF(MemDepUnit, "Replaying mem instruction PC %s [sn:%lli].\n", + temp_inst->pcState(), temp_inst->seqNum); + + moveToReady(inst_entry); + + instsToReplay.pop_front(); + } +} + +void +MemDepUnit::completed(const O3DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n", + inst->pcState(), inst->seqNum); + + ThreadID tid = inst->threadNumber; + + // Remove the instruction from the hash and the list. + MemDepHashIt hash_it = memDepHash.find(inst->seqNum); + + assert(hash_it != memDepHash.end()); + + instList[tid].erase((*hash_it).second->listIt); + + (*hash_it).second = NULL; + + memDepHash.erase(hash_it); +#ifdef DEBUG + MemDepEntry::memdep_erase++; +#endif +} + +void +MemDepUnit::completeInst(const O3DynInstPtr &inst) +{ + wakeDependents(inst); + completed(inst); + InstSeqNum barr_sn = inst->seqNum; + + if (inst->isWriteBarrier() || inst->isHtmCmd()) { + assert(hasStoreBarrier()); + storeBarrierSNs.erase(barr_sn); + } + if (inst->isReadBarrier() || inst->isHtmCmd()) { + assert(hasLoadBarrier()); + loadBarrierSNs.erase(barr_sn); + } + if (Debug::MemDepUnit) { + const char *barrier_type = nullptr; + if (inst->isWriteBarrier() && inst->isReadBarrier()) + barrier_type = "Memory"; + else if (inst->isWriteBarrier()) + barrier_type = "Write"; + else if (inst->isReadBarrier()) + barrier_type = "Read"; + + if (barrier_type) { + DPRINTF(MemDepUnit, "%s barrier completed: %s SN:%lli\n", + barrier_type, inst->pcState(), inst->seqNum); + } + } +} + +void +MemDepUnit::wakeDependents(const O3DynInstPtr &inst) +{ + // Only stores, atomics and barriers have dependents. + if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() && + !inst->isWriteBarrier() && !inst->isHtmCmd()) { + return; + } + + MemDepEntryPtr inst_entry = findInHash(inst); + + for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) { + MemDepEntryPtr woken_inst = inst_entry->dependInsts[i]; + + if (!woken_inst->inst) { + // Potentially removed mem dep entries could be on this list + continue; + } + + DPRINTF(MemDepUnit, "Waking up a dependent inst, " + "[sn:%lli].\n", + woken_inst->inst->seqNum); + + assert(woken_inst->memDeps > 0); + woken_inst->memDeps -= 1; + + if ((woken_inst->memDeps == 0) && + woken_inst->regsReady && + !woken_inst->squashed) { + moveToReady(woken_inst); + } + } + + inst_entry->dependInsts.clear(); +} + +MemDepUnit::MemDepEntry::MemDepEntry(const O3DynInstPtr &new_inst) : + inst(new_inst) +{ +#ifdef DEBUG + ++memdep_count; + + DPRINTF(MemDepUnit, + "Memory dependency entry created. memdep_count=%i %s\n", + memdep_count, inst->pcState()); +#endif +} + +MemDepUnit::MemDepEntry::~MemDepEntry() +{ + for (int i = 0; i < dependInsts.size(); ++i) { + dependInsts[i] = NULL; + } +#ifdef DEBUG + --memdep_count; + + DPRINTF(MemDepUnit, + "Memory dependency entry deleted. memdep_count=%i %s\n", + memdep_count, inst->pcState()); +#endif +} + +void +MemDepUnit::squash(const InstSeqNum &squashed_num, ThreadID tid) +{ + if (!instsToReplay.empty()) { + ListIt replay_it = instsToReplay.begin(); + while (replay_it != instsToReplay.end()) { + if ((*replay_it)->threadNumber == tid && + (*replay_it)->seqNum > squashed_num) { + instsToReplay.erase(replay_it++); + } else { + ++replay_it; + } + } + } + + ListIt squash_it = instList[tid].end(); + --squash_it; + + MemDepHashIt hash_it; + + while (!instList[tid].empty() && + (*squash_it)->seqNum > squashed_num) { + + DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n", + (*squash_it)->seqNum); + + loadBarrierSNs.erase((*squash_it)->seqNum); + + storeBarrierSNs.erase((*squash_it)->seqNum); + + hash_it = memDepHash.find((*squash_it)->seqNum); + + assert(hash_it != memDepHash.end()); + + (*hash_it).second->squashed = true; + + (*hash_it).second = NULL; + + memDepHash.erase(hash_it); +#ifdef DEBUG + MemDepEntry::memdep_erase++; +#endif + + instList[tid].erase(squash_it--); + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num, tid); +} + +void +MemDepUnit::violation(const O3DynInstPtr &store_inst, + const O3DynInstPtr &violating_load) +{ + DPRINTF(MemDepUnit, "Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->instAddr(), + store_inst->instAddr()); + // Tell the memory dependence unit of the violation. + depPred.violation(store_inst->instAddr(), violating_load->instAddr()); +} + +void +MemDepUnit::issue(const O3DynInstPtr &inst) +{ + DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n", + inst->instAddr(), inst->seqNum); + + depPred.issued(inst->instAddr(), inst->seqNum, inst->isStore()); +} + +MemDepUnit::MemDepEntryPtr & +MemDepUnit::findInHash(const O3DynInstConstPtr &inst) +{ + MemDepHashIt hash_it = memDepHash.find(inst->seqNum); + + assert(hash_it != memDepHash.end()); + + return (*hash_it).second; +} + +void +MemDepUnit::moveToReady(MemDepEntryPtr &woken_inst_entry) +{ + DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] " + "to the ready list.\n", woken_inst_entry->inst->seqNum); + + assert(!woken_inst_entry->squashed); + + iqPtr->addReadyMemInst(woken_inst_entry->inst); +} + + +void +MemDepUnit::dumpLists() +{ + for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { + cprintf("Instruction list %i size: %i\n", + tid, instList[tid].size()); + + ListIt inst_list_it = instList[tid].begin(); + int num = 0; + + while (inst_list_it != instList[tid].end()) { + cprintf("Instruction:%i\nPC: %s\n[sn:%llu]\n[tid:%i]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*inst_list_it)->pcState(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } + } + + cprintf("Memory dependence hash size: %i\n", memDepHash.size()); + +#ifdef DEBUG + cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +#endif +} diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh index 744d736ae1..21c5f71edf 100644 --- a/src/cpu/o3/mem_dep_unit.hh +++ b/src/cpu/o3/mem_dep_unit.hh @@ -50,16 +50,18 @@ #include "base/statistics.hh" #include "cpu/inst_seq.hh" #include "cpu/o3/dyn_inst_ptr.hh" +#include "cpu/o3/impl.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/store_set.hh" #include "debug/MemDepUnit.hh" struct SNHash { - size_t operator() (const InstSeqNum &seq_num) const { + size_t + operator()(const InstSeqNum &seq_num) const + { unsigned a = (unsigned)seq_num; unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; - return hash; } }; @@ -83,7 +85,6 @@ class FullO3CPU; * utilize. Thus this class should be most likely be rewritten for other * dependence prediction schemes. */ -template class MemDepUnit { protected: @@ -104,7 +105,7 @@ class MemDepUnit /** Initializes the unit with parameters and a thread id. */ void init(const DerivO3CPUParams ¶ms, ThreadID tid, - FullO3CPU *cpu); + FullO3CPU *cpu); /** Determine if we are drained. */ bool isDrained() const; @@ -116,7 +117,7 @@ class MemDepUnit void takeOverFrom(); /** Sets the pointer to the IQ. */ - void setIQ(InstructionQueue *iq_ptr); + void setIQ(InstructionQueue *iq_ptr); /** Inserts a memory instruction. */ void insert(const O3DynInstPtr &inst); @@ -181,31 +182,10 @@ class MemDepUnit { public: /** Constructs a memory dependence entry. */ - MemDepEntry(const O3DynInstPtr &new_inst) - : inst(new_inst), regsReady(false), memDeps(0), - completed(false), squashed(false) - { -#ifdef DEBUG - ++memdep_count; - - DPRINTF(MemDepUnit, "Memory dependency entry created. " - "memdep_count=%i %s\n", memdep_count, inst->pcState()); -#endif - } + MemDepEntry(const O3DynInstPtr &new_inst); /** Frees any pointers. */ - ~MemDepEntry() - { - for (int i = 0; i < dependInsts.size(); ++i) { - dependInsts[i] = NULL; - } -#ifdef DEBUG - --memdep_count; - - DPRINTF(MemDepUnit, "Memory dependency entry deleted. " - "memdep_count=%i %s\n", memdep_count, inst->pcState()); -#endif - } + ~MemDepEntry(); /** Returns the name of the memory dependence entry. */ std::string name() const { return "memdepentry"; } @@ -220,13 +200,13 @@ class MemDepUnit std::vector dependInsts; /** If the registers are ready or not. */ - bool regsReady; + bool regsReady = false; /** Number of memory dependencies that need to be satisfied. */ - int memDeps; + int memDeps = 0; /** If the instruction is completed. */ - bool completed; + bool completed = false; /** If the instruction is squashed. */ - bool squashed; + bool squashed = false; /** For debugging. */ #ifdef DEBUG @@ -278,7 +258,7 @@ class MemDepUnit void insertBarrierSN(const O3DynInstPtr &barr_inst); /** Pointer to the IQ. */ - InstructionQueue *iqPtr; + InstructionQueue *iqPtr; /** The thread id of this memory dependence unit. */ int id; diff --git a/src/cpu/o3/mem_dep_unit_impl.hh b/src/cpu/o3/mem_dep_unit_impl.hh deleted file mode 100644 index f77fe4f2d0..0000000000 --- a/src/cpu/o3/mem_dep_unit_impl.hh +++ /dev/null @@ -1,648 +0,0 @@ -/* - * Copyright (c) 2012, 2014, 2020 ARM Limited - * All rights reserved - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2004-2006 The Regents of The University of Michigan - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __CPU_O3_MEM_DEP_UNIT_IMPL_HH__ -#define __CPU_O3_MEM_DEP_UNIT_IMPL_HH__ - -#include -#include -#include - -#include "base/compiler.hh" -#include "base/debug.hh" -#include "cpu/o3/inst_queue.hh" -#include "cpu/o3/limits.hh" -#include "cpu/o3/mem_dep_unit.hh" -#include "debug/MemDepUnit.hh" -#include "params/DerivO3CPU.hh" - -template -MemDepUnit::MemDepUnit() - : iqPtr(NULL), - stats(nullptr) -{ -} - -template -MemDepUnit::MemDepUnit(const DerivO3CPUParams ¶ms) - : _name(params.name + ".memdepunit"), - depPred(params.store_set_clear_period, params.SSITSize, - params.LFSTSize), - iqPtr(NULL), - stats(nullptr) -{ - DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n"); -} - -template -MemDepUnit::~MemDepUnit() -{ - for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { - - ListIt inst_list_it = instList[tid].begin(); - - MemDepHashIt hash_it; - - while (!instList[tid].empty()) { - hash_it = memDepHash.find((*inst_list_it)->seqNum); - - assert(hash_it != memDepHash.end()); - - memDepHash.erase(hash_it); - - instList[tid].erase(inst_list_it++); - } - } - -#ifdef DEBUG - assert(MemDepEntry::memdep_count == 0); -#endif -} - -template -void -MemDepUnit::init( - const DerivO3CPUParams ¶ms, ThreadID tid, FullO3CPU *cpu) -{ - DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid); - - _name = csprintf("%s.memDep%d", params.name, tid); - id = tid; - - depPred.init(params.store_set_clear_period, params.SSITSize, - params.LFSTSize); - - std::string stats_group_name = csprintf("MemDepUnit__%i", tid); - cpu->addStatGroup(stats_group_name.c_str(), &stats); -} - -template -MemDepUnit:: -MemDepUnitStats::MemDepUnitStats(Stats::Group *parent) - : Stats::Group(parent), - ADD_STAT(insertedLoads, Stats::Units::Count::get(), - "Number of loads inserted to the mem dependence unit."), - ADD_STAT(insertedStores, Stats::Units::Count::get(), - "Number of stores inserted to the mem dependence unit."), - ADD_STAT(conflictingLoads, Stats::Units::Count::get(), - "Number of conflicting loads."), - ADD_STAT(conflictingStores, Stats::Units::Count::get(), - "Number of conflicting stores.") -{ -} - -template -bool -MemDepUnit::isDrained() const -{ - bool drained = instsToReplay.empty() - && memDepHash.empty() - && instsToReplay.empty(); - for (int i = 0; i < O3MaxThreads; ++i) - drained = drained && instList[i].empty(); - - return drained; -} - -template -void -MemDepUnit::drainSanityCheck() const -{ - assert(instsToReplay.empty()); - assert(memDepHash.empty()); - for (int i = 0; i < O3MaxThreads; ++i) - assert(instList[i].empty()); - assert(instsToReplay.empty()); - assert(memDepHash.empty()); -} - -template -void -MemDepUnit::takeOverFrom() -{ - // Be sure to reset all state. - loadBarrierSNs.clear(); - storeBarrierSNs.clear(); - depPred.clear(); -} - -template -void -MemDepUnit::setIQ(InstructionQueue *iq_ptr) -{ - iqPtr = iq_ptr; -} - -template -void -MemDepUnit::insertBarrierSN(const O3DynInstPtr &barr_inst) -{ - InstSeqNum barr_sn = barr_inst->seqNum; - - if (barr_inst->isReadBarrier() || barr_inst->isHtmCmd()) - loadBarrierSNs.insert(barr_sn); - if (barr_inst->isWriteBarrier() || barr_inst->isHtmCmd()) - storeBarrierSNs.insert(barr_sn); - - if (Debug::MemDepUnit) { - const char *barrier_type = nullptr; - if (barr_inst->isReadBarrier() && barr_inst->isWriteBarrier()) - barrier_type = "memory"; - else if (barr_inst->isReadBarrier()) - barrier_type = "read"; - else if (barr_inst->isWriteBarrier()) - barrier_type = "write"; - - if (barrier_type) { - DPRINTF(MemDepUnit, "Inserted a %s barrier %s SN:%lli\n", - barrier_type, barr_inst->pcState(), barr_sn); - } - - if (loadBarrierSNs.size() || storeBarrierSNs.size()) { - DPRINTF(MemDepUnit, "Outstanding load barriers = %d; " - "store barriers = %d\n", - loadBarrierSNs.size(), storeBarrierSNs.size()); - } - } -} - -template -void -MemDepUnit::insert(const O3DynInstPtr &inst) -{ - ThreadID tid = inst->threadNumber; - - MemDepEntryPtr inst_entry = std::make_shared(inst); - - // Add the MemDepEntry to the hash. - memDepHash.insert( - std::pair(inst->seqNum, inst_entry)); -#ifdef DEBUG - MemDepEntry::memdep_insert++; -#endif - - instList[tid].push_back(inst); - - inst_entry->listIt = --(instList[tid].end()); - - // Check any barriers and the dependence predictor for any - // producing memrefs/stores. - std::vector producing_stores; - if ((inst->isLoad() || inst->isAtomic()) && hasLoadBarrier()) { - DPRINTF(MemDepUnit, "%d load barriers in flight\n", - loadBarrierSNs.size()); - producing_stores.insert(std::end(producing_stores), - std::begin(loadBarrierSNs), - std::end(loadBarrierSNs)); - } else if ((inst->isStore() || inst->isAtomic()) && hasStoreBarrier()) { - DPRINTF(MemDepUnit, "%d store barriers in flight\n", - storeBarrierSNs.size()); - producing_stores.insert(std::end(producing_stores), - std::begin(storeBarrierSNs), - std::end(storeBarrierSNs)); - } else { - InstSeqNum dep = depPred.checkInst(inst->instAddr()); - if (dep != 0) - producing_stores.push_back(dep); - } - - std::vector store_entries; - - // If there is a producing store, try to find the entry. - for (auto producing_store : producing_stores) { - DPRINTF(MemDepUnit, "Searching for producer [sn:%lli]\n", - producing_store); - MemDepHashIt hash_it = memDepHash.find(producing_store); - - if (hash_it != memDepHash.end()) { - store_entries.push_back((*hash_it).second); - DPRINTF(MemDepUnit, "Producer found\n"); - } - } - - // If no store entry, then instruction can issue as soon as the registers - // are ready. - if (store_entries.empty()) { - DPRINTF(MemDepUnit, "No dependency for inst PC " - "%s [sn:%lli].\n", inst->pcState(), inst->seqNum); - - assert(inst_entry->memDeps == 0); - - if (inst->readyToIssue()) { - inst_entry->regsReady = true; - - moveToReady(inst_entry); - } - } else { - // Otherwise make the instruction dependent on the store/barrier. - DPRINTF(MemDepUnit, "Adding to dependency list\n"); - for (GEM5_VAR_USED auto producing_store : producing_stores) - DPRINTF(MemDepUnit, "\tinst PC %s is dependent on [sn:%lli].\n", - inst->pcState(), producing_store); - - if (inst->readyToIssue()) { - inst_entry->regsReady = true; - } - - // Clear the bit saying this instruction can issue. - inst->clearCanIssue(); - - // Add this instruction to the list of dependents. - for (auto store_entry : store_entries) - store_entry->dependInsts.push_back(inst_entry); - - inst_entry->memDeps = store_entries.size(); - - if (inst->isLoad()) { - ++stats.conflictingLoads; - } else { - ++stats.conflictingStores; - } - } - - // for load-acquire store-release that could also be a barrier - insertBarrierSN(inst); - - if (inst->isStore() || inst->isAtomic()) { - DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n", - inst->pcState(), inst->seqNum); - - depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber); - - ++stats.insertedStores; - } else if (inst->isLoad()) { - ++stats.insertedLoads; - } else { - panic("Unknown type! (most likely a barrier)."); - } -} - -template -void -MemDepUnit::insertNonSpec(const O3DynInstPtr &inst) -{ - insertBarrier(inst); - - // Might want to turn this part into an inline function or something. - // It's shared between both insert functions. - if (inst->isStore() || inst->isAtomic()) { - DPRINTF(MemDepUnit, "Inserting store/atomic PC %s [sn:%lli].\n", - inst->pcState(), inst->seqNum); - - depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber); - - ++stats.insertedStores; - } else if (inst->isLoad()) { - ++stats.insertedLoads; - } else { - panic("Unknown type! (most likely a barrier)."); - } -} - -template -void -MemDepUnit::insertBarrier(const O3DynInstPtr &barr_inst) -{ - ThreadID tid = barr_inst->threadNumber; - - MemDepEntryPtr inst_entry = std::make_shared(barr_inst); - - // Add the MemDepEntry to the hash. - memDepHash.insert( - std::pair(barr_inst->seqNum, inst_entry)); -#ifdef DEBUG - MemDepEntry::memdep_insert++; -#endif - - // Add the instruction to the instruction list. - instList[tid].push_back(barr_inst); - - inst_entry->listIt = --(instList[tid].end()); - - insertBarrierSN(barr_inst); -} - -template -void -MemDepUnit::regsReady(const O3DynInstPtr &inst) -{ - DPRINTF(MemDepUnit, "Marking registers as ready for " - "instruction PC %s [sn:%lli].\n", - inst->pcState(), inst->seqNum); - - MemDepEntryPtr inst_entry = findInHash(inst); - - inst_entry->regsReady = true; - - if (inst_entry->memDeps == 0) { - DPRINTF(MemDepUnit, "Instruction has its memory " - "dependencies resolved, adding it to the ready list.\n"); - - moveToReady(inst_entry); - } else { - DPRINTF(MemDepUnit, "Instruction still waiting on " - "memory dependency.\n"); - } -} - -template -void -MemDepUnit::nonSpecInstReady(const O3DynInstPtr &inst) -{ - DPRINTF(MemDepUnit, "Marking non speculative " - "instruction PC %s as ready [sn:%lli].\n", - inst->pcState(), inst->seqNum); - - MemDepEntryPtr inst_entry = findInHash(inst); - - moveToReady(inst_entry); -} - -template -void -MemDepUnit::reschedule(const O3DynInstPtr &inst) -{ - instsToReplay.push_back(inst); -} - -template -void -MemDepUnit::replay() -{ - O3DynInstPtr temp_inst; - - // For now this replay function replays all waiting memory ops. - while (!instsToReplay.empty()) { - temp_inst = instsToReplay.front(); - - MemDepEntryPtr inst_entry = findInHash(temp_inst); - - DPRINTF(MemDepUnit, "Replaying mem instruction PC %s [sn:%lli].\n", - temp_inst->pcState(), temp_inst->seqNum); - - moveToReady(inst_entry); - - instsToReplay.pop_front(); - } -} - -template -void -MemDepUnit::completed(const O3DynInstPtr &inst) -{ - DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n", - inst->pcState(), inst->seqNum); - - ThreadID tid = inst->threadNumber; - - // Remove the instruction from the hash and the list. - MemDepHashIt hash_it = memDepHash.find(inst->seqNum); - - assert(hash_it != memDepHash.end()); - - instList[tid].erase((*hash_it).second->listIt); - - (*hash_it).second = NULL; - - memDepHash.erase(hash_it); -#ifdef DEBUG - MemDepEntry::memdep_erase++; -#endif -} - -template -void -MemDepUnit::completeInst(const O3DynInstPtr &inst) -{ - wakeDependents(inst); - completed(inst); - InstSeqNum barr_sn = inst->seqNum; - - if (inst->isWriteBarrier() || inst->isHtmCmd()) { - assert(hasStoreBarrier()); - storeBarrierSNs.erase(barr_sn); - } - if (inst->isReadBarrier() || inst->isHtmCmd()) { - assert(hasLoadBarrier()); - loadBarrierSNs.erase(barr_sn); - } - if (Debug::MemDepUnit) { - const char *barrier_type = nullptr; - if (inst->isWriteBarrier() && inst->isReadBarrier()) - barrier_type = "Memory"; - else if (inst->isWriteBarrier()) - barrier_type = "Write"; - else if (inst->isReadBarrier()) - barrier_type = "Read"; - - if (barrier_type) { - DPRINTF(MemDepUnit, "%s barrier completed: %s SN:%lli\n", - barrier_type, inst->pcState(), inst->seqNum); - } - } -} - -template -void -MemDepUnit::wakeDependents(const O3DynInstPtr &inst) -{ - // Only stores, atomics and barriers have dependents. - if (!inst->isStore() && !inst->isAtomic() && !inst->isReadBarrier() && - !inst->isWriteBarrier() && !inst->isHtmCmd()) { - return; - } - - MemDepEntryPtr inst_entry = findInHash(inst); - - for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) { - MemDepEntryPtr woken_inst = inst_entry->dependInsts[i]; - - if (!woken_inst->inst) { - // Potentially removed mem dep entries could be on this list - continue; - } - - DPRINTF(MemDepUnit, "Waking up a dependent inst, " - "[sn:%lli].\n", - woken_inst->inst->seqNum); - - assert(woken_inst->memDeps > 0); - woken_inst->memDeps -= 1; - - if ((woken_inst->memDeps == 0) && - woken_inst->regsReady && - !woken_inst->squashed) { - moveToReady(woken_inst); - } - } - - inst_entry->dependInsts.clear(); -} - -template -void -MemDepUnit::squash(const InstSeqNum &squashed_num, - ThreadID tid) -{ - if (!instsToReplay.empty()) { - ListIt replay_it = instsToReplay.begin(); - while (replay_it != instsToReplay.end()) { - if ((*replay_it)->threadNumber == tid && - (*replay_it)->seqNum > squashed_num) { - instsToReplay.erase(replay_it++); - } else { - ++replay_it; - } - } - } - - ListIt squash_it = instList[tid].end(); - --squash_it; - - MemDepHashIt hash_it; - - while (!instList[tid].empty() && - (*squash_it)->seqNum > squashed_num) { - - DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n", - (*squash_it)->seqNum); - - loadBarrierSNs.erase((*squash_it)->seqNum); - - storeBarrierSNs.erase((*squash_it)->seqNum); - - hash_it = memDepHash.find((*squash_it)->seqNum); - - assert(hash_it != memDepHash.end()); - - (*hash_it).second->squashed = true; - - (*hash_it).second = NULL; - - memDepHash.erase(hash_it); -#ifdef DEBUG - MemDepEntry::memdep_erase++; -#endif - - instList[tid].erase(squash_it--); - } - - // Tell the dependency predictor to squash as well. - depPred.squash(squashed_num, tid); -} - -template -void -MemDepUnit::violation(const O3DynInstPtr &store_inst, - const O3DynInstPtr &violating_load) -{ - DPRINTF(MemDepUnit, "Passing violating PCs to store sets," - " load: %#x, store: %#x\n", violating_load->instAddr(), - store_inst->instAddr()); - // Tell the memory dependence unit of the violation. - depPred.violation(store_inst->instAddr(), violating_load->instAddr()); -} - -template -void -MemDepUnit::issue(const O3DynInstPtr &inst) -{ - DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n", - inst->instAddr(), inst->seqNum); - - depPred.issued(inst->instAddr(), inst->seqNum, inst->isStore()); -} - -template -typename MemDepUnit::MemDepEntryPtr & -MemDepUnit::findInHash(const O3DynInstConstPtr &inst) -{ - MemDepHashIt hash_it = memDepHash.find(inst->seqNum); - - assert(hash_it != memDepHash.end()); - - return (*hash_it).second; -} - -template -void -MemDepUnit::moveToReady(MemDepEntryPtr &woken_inst_entry) -{ - DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] " - "to the ready list.\n", woken_inst_entry->inst->seqNum); - - assert(!woken_inst_entry->squashed); - - iqPtr->addReadyMemInst(woken_inst_entry->inst); -} - - -template -void -MemDepUnit::dumpLists() -{ - for (ThreadID tid = 0; tid < O3MaxThreads; tid++) { - cprintf("Instruction list %i size: %i\n", - tid, instList[tid].size()); - - ListIt inst_list_it = instList[tid].begin(); - int num = 0; - - while (inst_list_it != instList[tid].end()) { - cprintf("Instruction:%i\nPC: %s\n[sn:%llu]\n[tid:%i]\nIssued:%i\n" - "Squashed:%i\n\n", - num, (*inst_list_it)->pcState(), - (*inst_list_it)->seqNum, - (*inst_list_it)->threadNumber, - (*inst_list_it)->isIssued(), - (*inst_list_it)->isSquashed()); - inst_list_it++; - ++num; - } - } - - cprintf("Memory dependence hash size: %i\n", memDepHash.size()); - -#ifdef DEBUG - cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); -#endif -} - -#endif//__CPU_O3_MEM_DEP_UNIT_IMPL_HH__