The DEBUG macro is not part of any compiler standards (differently from NDEBUG, which elides assertions). It is only meant to differentiate gem5.debug from .fast and .opt builds. gem5 developers have used it to insert helper code that is supposed to aid the debugging process in case anything goes wrong. This generic name is likely to clash with other libraries linked with gem5. This is the case of DRAMSim as an example. Rather than using undef tricks, we just inject a GEM5_DEBUG macro for gem5.debug builds. Change-Id: Ie913ca30da615bd0075277a260bbdbc397b7ec87 Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69079 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
460 lines
15 KiB
C++
460 lines
15 KiB
C++
/*
|
|
* Copyright (c) 2010-2011, 2021 ARM Limited
|
|
* All rights reserved
|
|
*
|
|
* The license below extends only to copyright in the software and shall
|
|
* not be construed as granting a license to any other intellectual
|
|
* property including but not limited to intellectual property relating
|
|
* to a hardware implementation of the functionality of the software
|
|
* licensed hereunder. You may use the software subject to the license
|
|
* terms below provided that you ensure that this notice is replicated
|
|
* unmodified and in its entirety in all distributions of the software,
|
|
* modified or unmodified, in source code or in binary form.
|
|
*
|
|
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met: redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer;
|
|
* redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution;
|
|
* neither the name of the copyright holders nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "cpu/o3/dyn_inst.hh"
|
|
|
|
#include <algorithm>
|
|
|
|
#include "base/intmath.hh"
|
|
#include "debug/DynInst.hh"
|
|
#include "debug/IQ.hh"
|
|
#include "debug/O3PipeView.hh"
|
|
|
|
namespace gem5
|
|
{
|
|
|
|
namespace o3
|
|
{
|
|
|
|
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
|
|
const StaticInstPtr &_macroop, InstSeqNum seq_num, CPU *_cpu)
|
|
: seqNum(seq_num), staticInst(static_inst), cpu(_cpu),
|
|
_numSrcs(arrays.numSrcs), _numDests(arrays.numDests),
|
|
_flatDestIdx(arrays.flatDestIdx), _destIdx(arrays.destIdx),
|
|
_prevDestIdx(arrays.prevDestIdx), _srcIdx(arrays.srcIdx),
|
|
_readySrcIdx(arrays.readySrcIdx), macroop(_macroop)
|
|
{
|
|
std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
|
|
|
|
status.reset();
|
|
|
|
instFlags.reset();
|
|
instFlags[RecordResult] = true;
|
|
instFlags[Predicate] = true;
|
|
instFlags[MemAccPredicate] = true;
|
|
|
|
#ifndef NDEBUG
|
|
++cpu->instcount;
|
|
|
|
if (cpu->instcount > 1500) {
|
|
#ifdef GEM5_DEBUG
|
|
cpu->dumpInsts();
|
|
dumpSNList();
|
|
#endif
|
|
assert(cpu->instcount <= 1500);
|
|
}
|
|
|
|
DPRINTF(DynInst,
|
|
"DynInst: [sn:%lli] Instruction created. Instcount for %s = %i\n",
|
|
seqNum, cpu->name(), cpu->instcount);
|
|
#endif
|
|
|
|
#ifdef GEM5_DEBUG
|
|
cpu->snList.insert(seqNum);
|
|
#endif
|
|
|
|
}
|
|
|
|
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
|
|
const StaticInstPtr &_macroop, const PCStateBase &_pc,
|
|
const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *_cpu)
|
|
: DynInst(arrays, static_inst, _macroop, seq_num, _cpu)
|
|
{
|
|
set(pc, _pc);
|
|
set(predPC, pred_pc);
|
|
}
|
|
|
|
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
|
|
const StaticInstPtr &_macroop)
|
|
: DynInst(arrays, _staticInst, _macroop, 0, nullptr)
|
|
{}
|
|
|
|
/*
|
|
* This custom "new" operator uses the default "new" operator to allocate space
|
|
* for a DynInst, but also pads out the number of bytes to make room for some
|
|
* extra structures the DynInst needs. We save time and improve performance by
|
|
* only going to the heap once to get space for all these structures.
|
|
*
|
|
* When a DynInst is allocated with new, the compiler will call this "new"
|
|
* operator with "count" set to the number of bytes it needs to store the
|
|
* DynInst. We ultimately call into the default new operator to get those
|
|
* bytes, but before we do, we pad out "count" so that there will be extra
|
|
* space for some structures the DynInst needs. We take into account both the
|
|
* absolute size of these structures, and also what alignment they need.
|
|
*
|
|
* Once we've gotten a buffer large enough to hold the DynInst itself and these
|
|
* extra structures, we construct the extra bits using placement new. This
|
|
* constructs the structures in place in the space we created for them.
|
|
*
|
|
* Next, we return the buffer as the result of our operator. The compiler takes
|
|
* that buffer and constructs the DynInst in the beginning of it using the
|
|
* DynInst constructor.
|
|
*
|
|
* To avoid having to calculate where these extra structures are twice, once
|
|
* when making room for them and initializing them, and then once again in the
|
|
* DynInst constructor, we also pass in a structure called "arrays" which holds
|
|
* pointers to them. The fields of "arrays" are initialized in this operator,
|
|
* and are then consumed in the DynInst constructor.
|
|
*/
|
|
void *
|
|
DynInst::operator new(size_t count, Arrays &arrays)
|
|
{
|
|
// Convenience variables for brevity.
|
|
const auto num_dests = arrays.numDests;
|
|
const auto num_srcs = arrays.numSrcs;
|
|
|
|
// Figure out where everything will go.
|
|
uintptr_t inst = 0;
|
|
size_t inst_size = count;
|
|
|
|
uintptr_t flat_dest_idx = roundUp(inst + inst_size, alignof(RegId));
|
|
size_t flat_dest_idx_size = sizeof(*arrays.flatDestIdx) * num_dests;
|
|
|
|
uintptr_t dest_idx =
|
|
roundUp(flat_dest_idx + flat_dest_idx_size, alignof(PhysRegIdPtr));
|
|
size_t dest_idx_size = sizeof(*arrays.destIdx) * num_dests;
|
|
|
|
uintptr_t prev_dest_idx =
|
|
roundUp(dest_idx + dest_idx_size, alignof(PhysRegIdPtr));
|
|
size_t prev_dest_idx_size = sizeof(*arrays.prevDestIdx) * num_dests;
|
|
|
|
uintptr_t src_idx =
|
|
roundUp(prev_dest_idx + prev_dest_idx_size, alignof(PhysRegIdPtr));
|
|
size_t src_idx_size = sizeof(*arrays.srcIdx) * num_srcs;
|
|
|
|
uintptr_t ready_src_idx =
|
|
roundUp(src_idx + src_idx_size, alignof(uint8_t));
|
|
size_t ready_src_idx_size =
|
|
sizeof(*arrays.readySrcIdx) * ((num_srcs + 7) / 8);
|
|
|
|
// Figure out how much space we need in total.
|
|
size_t total_size = ready_src_idx + ready_src_idx_size;
|
|
|
|
// Actually allocate it.
|
|
uint8_t *buf = (uint8_t *)::operator new(total_size);
|
|
|
|
// Fill in "arrays" with pointers to all the arrays.
|
|
arrays.flatDestIdx = (RegId *)(buf + flat_dest_idx);
|
|
arrays.destIdx = (PhysRegIdPtr *)(buf + dest_idx);
|
|
arrays.prevDestIdx = (PhysRegIdPtr *)(buf + prev_dest_idx);
|
|
arrays.srcIdx = (PhysRegIdPtr *)(buf + src_idx);
|
|
arrays.readySrcIdx = (uint8_t *)(buf + ready_src_idx);
|
|
|
|
// Initialize all the extra components.
|
|
new (arrays.flatDestIdx) RegId[num_dests];
|
|
new (arrays.destIdx) PhysRegIdPtr[num_dests];
|
|
new (arrays.prevDestIdx) PhysRegIdPtr[num_dests];
|
|
new (arrays.srcIdx) PhysRegIdPtr[num_srcs];
|
|
new (arrays.readySrcIdx) uint8_t[num_srcs];
|
|
|
|
return buf;
|
|
}
|
|
|
|
// Because of the custom "new" operator that allocates more bytes than the
|
|
// size of the DynInst object, AddressSanitizer throw new-delete-type-mismatch.
|
|
// Adding a custom delete function is enough to shut down this false positive
|
|
void
|
|
DynInst::operator delete(void *ptr)
|
|
{
|
|
::operator delete(ptr);
|
|
}
|
|
|
|
DynInst::~DynInst()
|
|
{
|
|
/*
|
|
* The buffer this DynInst occupies also holds some of the structures it
|
|
* points to. We need to call their destructors manually to make sure that
|
|
* they're cleaned up appropriately, but we don't need to free their memory
|
|
* explicitly since that's part of the DynInst's buffer and is already
|
|
* going to be freed as part of deleting the DynInst.
|
|
*/
|
|
for (int i = 0; i < _numDests; i++) {
|
|
_flatDestIdx[i].~RegId();
|
|
_destIdx[i].~PhysRegIdPtr();
|
|
_prevDestIdx[i].~PhysRegIdPtr();
|
|
}
|
|
|
|
for (int i = 0; i < _numSrcs; i++)
|
|
_srcIdx[i].~PhysRegIdPtr();
|
|
|
|
for (int i = 0; i < ((_numSrcs + 7) / 8); i++)
|
|
_readySrcIdx[i].~uint8_t();
|
|
|
|
#if TRACING_ON
|
|
if (debug::O3PipeView) {
|
|
Tick fetch = fetchTick;
|
|
// fetchTick can be -1 if the instruction fetched outside the trace
|
|
// window.
|
|
if (fetch != -1) {
|
|
Tick val;
|
|
// Print info needed by the pipeline activity viewer.
|
|
DPRINTFR(O3PipeView, "O3PipeView:fetch:%llu:0x%08llx:%d:%llu:%s\n",
|
|
fetch,
|
|
pcState().instAddr(),
|
|
pcState().microPC(),
|
|
seqNum,
|
|
staticInst->disassemble(pcState().instAddr()));
|
|
|
|
val = (decodeTick == -1) ? 0 : fetch + decodeTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:decode:%llu\n", val);
|
|
val = (renameTick == -1) ? 0 : fetch + renameTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:rename:%llu\n", val);
|
|
val = (dispatchTick == -1) ? 0 : fetch + dispatchTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:dispatch:%llu\n", val);
|
|
val = (issueTick == -1) ? 0 : fetch + issueTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:issue:%llu\n", val);
|
|
val = (completeTick == -1) ? 0 : fetch + completeTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:complete:%llu\n", val);
|
|
val = (commitTick == -1) ? 0 : fetch + commitTick;
|
|
|
|
Tick valS = (storeTick == -1) ? 0 : fetch + storeTick;
|
|
DPRINTFR(O3PipeView, "O3PipeView:retire:%llu:store:%llu\n",
|
|
val, valS);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
delete [] memData;
|
|
delete traceData;
|
|
fault = NoFault;
|
|
|
|
#ifndef NDEBUG
|
|
--cpu->instcount;
|
|
|
|
DPRINTF(DynInst,
|
|
"DynInst: [sn:%lli] Instruction destroyed. Instcount for %s = %i\n",
|
|
seqNum, cpu->name(), cpu->instcount);
|
|
#endif
|
|
#ifdef GEM5_DEBUG
|
|
cpu->snList.erase(seqNum);
|
|
#endif
|
|
};
|
|
|
|
|
|
#ifdef GEM5_DEBUG
|
|
void
|
|
DynInst::dumpSNList()
|
|
{
|
|
std::set<InstSeqNum>::iterator sn_it = cpu->snList.begin();
|
|
|
|
int count = 0;
|
|
while (sn_it != cpu->snList.end()) {
|
|
cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it));
|
|
count++;
|
|
sn_it++;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void
|
|
DynInst::dump()
|
|
{
|
|
cprintf("T%d : %#08d `", threadNumber, pc->instAddr());
|
|
std::cout << staticInst->disassemble(pc->instAddr());
|
|
cprintf("'\n");
|
|
}
|
|
|
|
void
|
|
DynInst::dump(std::string &outstring)
|
|
{
|
|
std::ostringstream s;
|
|
s << "T" << threadNumber << " : 0x" << pc->instAddr() << " "
|
|
<< staticInst->disassemble(pc->instAddr());
|
|
|
|
outstring = s.str();
|
|
}
|
|
|
|
void
|
|
DynInst::markSrcRegReady()
|
|
{
|
|
DPRINTF(IQ, "[sn:%lli] has %d ready out of %d sources. RTI %d)\n",
|
|
seqNum, readyRegs+1, numSrcRegs(), readyToIssue());
|
|
if (++readyRegs == numSrcRegs()) {
|
|
setCanIssue();
|
|
}
|
|
}
|
|
|
|
void
|
|
DynInst::markSrcRegReady(RegIndex src_idx)
|
|
{
|
|
readySrcIdx(src_idx, true);
|
|
markSrcRegReady();
|
|
}
|
|
|
|
|
|
void
|
|
DynInst::setSquashed()
|
|
{
|
|
status.set(Squashed);
|
|
|
|
if (!isPinnedRegsRenamed() || isPinnedRegsSquashDone())
|
|
return;
|
|
|
|
// This inst has been renamed already so it may go through rename
|
|
// again (e.g. if the squash is due to memory access order violation).
|
|
// Reset the write counters for all pinned destination register to ensure
|
|
// that they are in a consistent state for a possible re-rename. This also
|
|
// ensures that dest regs will be pinned to the same phys register if
|
|
// re-rename happens.
|
|
for (int idx = 0; idx < numDestRegs(); idx++) {
|
|
PhysRegIdPtr phys_dest_reg = renamedDestIdx(idx);
|
|
if (phys_dest_reg->isPinned()) {
|
|
phys_dest_reg->incrNumPinnedWrites();
|
|
if (isPinnedRegsWritten())
|
|
phys_dest_reg->incrNumPinnedWritesToComplete();
|
|
}
|
|
}
|
|
setPinnedRegsSquashDone();
|
|
}
|
|
|
|
Fault
|
|
DynInst::execute()
|
|
{
|
|
// @todo: Pretty convoluted way to avoid squashing from happening
|
|
// when using the TC during an instruction's execution
|
|
// (specifically for instructions that have side-effects that use
|
|
// the TC). Fix this.
|
|
bool no_squash_from_TC = thread->noSquashFromTC;
|
|
thread->noSquashFromTC = true;
|
|
|
|
fault = staticInst->execute(this, traceData);
|
|
|
|
thread->noSquashFromTC = no_squash_from_TC;
|
|
|
|
return fault;
|
|
}
|
|
|
|
Fault
|
|
DynInst::initiateAcc()
|
|
{
|
|
// @todo: Pretty convoluted way to avoid squashing from happening
|
|
// when using the TC during an instruction's execution
|
|
// (specifically for instructions that have side-effects that use
|
|
// the TC). Fix this.
|
|
bool no_squash_from_TC = thread->noSquashFromTC;
|
|
thread->noSquashFromTC = true;
|
|
|
|
fault = staticInst->initiateAcc(this, traceData);
|
|
|
|
thread->noSquashFromTC = no_squash_from_TC;
|
|
|
|
return fault;
|
|
}
|
|
|
|
Fault
|
|
DynInst::completeAcc(PacketPtr pkt)
|
|
{
|
|
// @todo: Pretty convoluted way to avoid squashing from happening
|
|
// when using the TC during an instruction's execution
|
|
// (specifically for instructions that have side-effects that use
|
|
// the TC). Fix this.
|
|
bool no_squash_from_TC = thread->noSquashFromTC;
|
|
thread->noSquashFromTC = true;
|
|
|
|
if (cpu->checker) {
|
|
if (isStoreConditional()) {
|
|
reqToVerify->setExtraData(pkt->req->getExtraData());
|
|
}
|
|
}
|
|
|
|
fault = staticInst->completeAcc(pkt, this, traceData);
|
|
|
|
thread->noSquashFromTC = no_squash_from_TC;
|
|
|
|
return fault;
|
|
}
|
|
|
|
void
|
|
DynInst::trap(const Fault &fault)
|
|
{
|
|
cpu->trap(fault, threadNumber, staticInst);
|
|
}
|
|
|
|
Fault
|
|
DynInst::initiateMemRead(Addr addr, unsigned size, Request::Flags flags,
|
|
const std::vector<bool> &byte_enable)
|
|
{
|
|
assert(byte_enable.size() == size);
|
|
return cpu->pushRequest(
|
|
dynamic_cast<DynInstPtr::PtrType>(this),
|
|
/* ld */ true, nullptr, size, addr, flags, nullptr, nullptr,
|
|
byte_enable);
|
|
}
|
|
|
|
Fault
|
|
DynInst::initiateMemMgmtCmd(Request::Flags flags)
|
|
{
|
|
const unsigned int size = 8;
|
|
return cpu->pushRequest(
|
|
dynamic_cast<DynInstPtr::PtrType>(this),
|
|
/* ld */ true, nullptr, size, 0x0ul, flags, nullptr, nullptr,
|
|
std::vector<bool>(size, true));
|
|
}
|
|
|
|
Fault
|
|
DynInst::writeMem(uint8_t *data, unsigned size, Addr addr,
|
|
Request::Flags flags, uint64_t *res,
|
|
const std::vector<bool> &byte_enable)
|
|
{
|
|
assert(byte_enable.size() == size);
|
|
return cpu->pushRequest(
|
|
dynamic_cast<DynInstPtr::PtrType>(this),
|
|
/* st */ false, data, size, addr, flags, res, nullptr,
|
|
byte_enable);
|
|
}
|
|
|
|
Fault
|
|
DynInst::initiateMemAMO(Addr addr, unsigned size, Request::Flags flags,
|
|
AtomicOpFunctorPtr amo_op)
|
|
{
|
|
// atomic memory instructions do not have data to be written to memory yet
|
|
// since the atomic operations will be executed directly in cache/memory.
|
|
// Therefore, its `data` field is nullptr.
|
|
// Atomic memory requests need to carry their `amo_op` fields to cache/
|
|
// memory
|
|
return cpu->pushRequest(
|
|
dynamic_cast<DynInstPtr::PtrType>(this),
|
|
/* atomic */ false, nullptr, size, addr, flags, nullptr,
|
|
std::move(amo_op), std::vector<bool>(size, true));
|
|
}
|
|
|
|
} // namespace o3
|
|
} // namespace gem5
|