cpu-o3: Refactor how registers are handled in the DynInst class.
The "Regs" structure in the DynInst class was using placement new to allocate register arrays in a dynamically allocated blob which can be resized based on the number of source and destination registers. Unfortunately, it was assumed that the alignment of the components of that structure would work out because they were ordered from largest to smallest, which should imply largest alignment to smallest. This change instead uses an overloaded new operator to allocate extra memory for the DynInst itself, and then initialize arrays within that extra space. The DynInst class then gets pointers to the arrays so it can access them. This has the benefit that only one chunk of memory is allocated, instead of one for the DynInst and then a second for the arrays. Also, this new version uses the alignof operator to figure out what alignment is needed for each array, which should avoid any undefined behavior. The new-ing, initialization, destructing, and delete-ing are also more carefully orchestrated. Hopefully one or both of these will squash potential memory management bugs. Change-Id: Id2fa090b53909f14a8cb39801e9930d4608e42f7 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/52485 Reviewed-by: Gabe Black <gabe.black@gmail.com> Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br> Maintainer: Gabe Black <gabe.black@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -1300,8 +1300,8 @@ Commit::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
|
||||
|
||||
// Update the commit rename map
|
||||
for (int i = 0; i < head_inst->numDestRegs(); i++) {
|
||||
renameMap[tid]->setEntry(head_inst->regs.flattenedDestIdx(i),
|
||||
head_inst->regs.renamedDestIdx(i));
|
||||
renameMap[tid]->setEntry(head_inst->flattenedDestIdx(i),
|
||||
head_inst->renamedDestIdx(i));
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "debug/DynInst.hh"
|
||||
#include "debug/IQ.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
@@ -52,13 +53,15 @@ namespace gem5
|
||||
namespace o3
|
||||
{
|
||||
|
||||
DynInst::DynInst(const StaticInstPtr &static_inst,
|
||||
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
|
||||
const StaticInstPtr &_macroop, InstSeqNum seq_num, CPU *_cpu)
|
||||
: seqNum(seq_num), staticInst(static_inst), cpu(_cpu),
|
||||
regs(staticInst->numSrcRegs(), staticInst->numDestRegs()),
|
||||
macroop(_macroop)
|
||||
_numSrcs(arrays.numSrcs), _numDests(arrays.numDests),
|
||||
_flatDestIdx(arrays.flatDestIdx), _destIdx(arrays.destIdx),
|
||||
_prevDestIdx(arrays.prevDestIdx), _srcIdx(arrays.srcIdx),
|
||||
_readySrcIdx(arrays.readySrcIdx), macroop(_macroop)
|
||||
{
|
||||
regs.init();
|
||||
std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
|
||||
|
||||
status.reset();
|
||||
|
||||
@@ -89,22 +92,122 @@ DynInst::DynInst(const StaticInstPtr &static_inst,
|
||||
|
||||
}
|
||||
|
||||
DynInst::DynInst(const StaticInstPtr &static_inst,
|
||||
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
|
||||
const StaticInstPtr &_macroop, const PCStateBase &_pc,
|
||||
const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *_cpu)
|
||||
: DynInst(static_inst, _macroop, seq_num, _cpu)
|
||||
: DynInst(arrays, static_inst, _macroop, seq_num, _cpu)
|
||||
{
|
||||
set(pc, _pc);
|
||||
set(predPC, pred_pc);
|
||||
}
|
||||
|
||||
DynInst::DynInst(const StaticInstPtr &_staticInst,
|
||||
DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
|
||||
const StaticInstPtr &_macroop)
|
||||
: DynInst(_staticInst, _macroop, 0, nullptr)
|
||||
: DynInst(arrays, _staticInst, _macroop, 0, nullptr)
|
||||
{}
|
||||
|
||||
/*
|
||||
* This custom "new" operator uses the default "new" operator to allocate space
|
||||
* for a DynInst, but also pads out the number of bytes to make room for some
|
||||
* extra structures the DynInst needs. We save time and improve performance by
|
||||
* only going to the heap once to get space for all these structures.
|
||||
*
|
||||
* When a DynInst is allocated with new, the compiler will call this "new"
|
||||
* operator with "count" set to the number of bytes it needs to store the
|
||||
* DynInst. We ultimately call into the default new operator to get those
|
||||
* bytes, but before we do, we pad out "count" so that there will be extra
|
||||
* space for some structures the DynInst needs. We take into account both the
|
||||
* absolute size of these structures, and also what alignment they need.
|
||||
*
|
||||
* Once we've gotten a buffer large enough to hold the DynInst itself and these
|
||||
* extra structures, we construct the extra bits using placement new. This
|
||||
* constructs the structures in place in the space we created for them.
|
||||
*
|
||||
* Next, we return the buffer as the result of our operator. The compiler takes
|
||||
* that buffer and constructs the DynInst in the beginning of it using the
|
||||
* DynInst constructor.
|
||||
*
|
||||
* To avoid having to calculate where these extra structures are twice, once
|
||||
* when making room for them and initializing them, and then once again in the
|
||||
* DynInst constructor, we also pass in a structure called "arrays" which holds
|
||||
* pointers to them. The fields of "arrays" are initialized in this operator,
|
||||
* and are then consumed in the DynInst constructor.
|
||||
*/
|
||||
void *
|
||||
DynInst::operator new(size_t count, Arrays &arrays)
|
||||
{
|
||||
// Convenience variables for brevity.
|
||||
const auto num_dests = arrays.numDests;
|
||||
const auto num_srcs = arrays.numSrcs;
|
||||
|
||||
// Figure out where everything will go.
|
||||
uintptr_t inst = 0;
|
||||
size_t inst_size = count;
|
||||
|
||||
uintptr_t flat_dest_idx = roundUp(inst + inst_size, alignof(RegId));
|
||||
size_t flat_dest_idx_size = sizeof(*arrays.flatDestIdx) * num_dests;
|
||||
|
||||
uintptr_t dest_idx =
|
||||
roundUp(flat_dest_idx + flat_dest_idx_size, alignof(PhysRegIdPtr));
|
||||
size_t dest_idx_size = sizeof(*arrays.destIdx) * num_dests;
|
||||
|
||||
uintptr_t prev_dest_idx =
|
||||
roundUp(dest_idx + dest_idx_size, alignof(PhysRegIdPtr));
|
||||
size_t prev_dest_idx_size = sizeof(*arrays.prevDestIdx) * num_dests;
|
||||
|
||||
uintptr_t src_idx =
|
||||
roundUp(prev_dest_idx + prev_dest_idx_size, alignof(PhysRegIdPtr));
|
||||
size_t src_idx_size = sizeof(*arrays.srcIdx) * num_srcs;
|
||||
|
||||
uintptr_t ready_src_idx =
|
||||
roundUp(src_idx + src_idx_size, alignof(uint8_t));
|
||||
size_t ready_src_idx_size =
|
||||
sizeof(*arrays.readySrcIdx) * ((num_srcs + 7) / 8);
|
||||
|
||||
// Figure out how much space we need in total.
|
||||
size_t total_size = ready_src_idx + ready_src_idx_size;
|
||||
|
||||
// Actually allocate it.
|
||||
uint8_t *buf = (uint8_t *)::operator new(total_size);
|
||||
|
||||
// Fill in "arrays" with pointers to all the arrays.
|
||||
arrays.flatDestIdx = (RegId *)(buf + flat_dest_idx);
|
||||
arrays.destIdx = (PhysRegIdPtr *)(buf + dest_idx);
|
||||
arrays.prevDestIdx = (PhysRegIdPtr *)(buf + prev_dest_idx);
|
||||
arrays.srcIdx = (PhysRegIdPtr *)(buf + src_idx);
|
||||
arrays.readySrcIdx = (uint8_t *)(buf + ready_src_idx);
|
||||
|
||||
// Initialize all the extra components.
|
||||
new (arrays.flatDestIdx) RegId[num_dests];
|
||||
new (arrays.destIdx) PhysRegIdPtr[num_dests];
|
||||
new (arrays.prevDestIdx) PhysRegIdPtr[num_dests];
|
||||
new (arrays.srcIdx) PhysRegIdPtr[num_srcs];
|
||||
new (arrays.readySrcIdx) uint8_t[num_srcs];
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
DynInst::~DynInst()
|
||||
{
|
||||
/*
|
||||
* The buffer this DynInst occupies also holds some of the structures it
|
||||
* points to. We need to call their destructors manually to make sure that
|
||||
* they're cleaned up appropriately, but we don't need to free their memory
|
||||
* explicitly since that's part of the DynInst's buffer and is already
|
||||
* going to be freed as part of deleting the DynInst.
|
||||
*/
|
||||
for (int i = 0; i < _numDests; i++) {
|
||||
_flatDestIdx[i].~RegId();
|
||||
_destIdx[i].~PhysRegIdPtr();
|
||||
_prevDestIdx[i].~PhysRegIdPtr();
|
||||
}
|
||||
|
||||
for (int i = 0; i < _numSrcs; i++)
|
||||
_srcIdx[i].~PhysRegIdPtr();
|
||||
|
||||
for (int i = 0; i < ((_numSrcs + 7) / 8); i++)
|
||||
_readySrcIdx[i].~uint8_t();
|
||||
|
||||
#if TRACING_ON
|
||||
if (debug::O3PipeView) {
|
||||
Tick fetch = fetchTick;
|
||||
@@ -202,7 +305,7 @@ DynInst::markSrcRegReady()
|
||||
void
|
||||
DynInst::markSrcRegReady(RegIndex src_idx)
|
||||
{
|
||||
regs.readySrcIdx(src_idx, true);
|
||||
readySrcIdx(src_idx, true);
|
||||
markSrcRegReady();
|
||||
}
|
||||
|
||||
@@ -222,7 +325,7 @@ DynInst::setSquashed()
|
||||
// ensures that dest regs will be pinned to the same phys register if
|
||||
// re-rename happens.
|
||||
for (int idx = 0; idx < numDestRegs(); idx++) {
|
||||
PhysRegIdPtr phys_dest_reg = regs.renamedDestIdx(idx);
|
||||
PhysRegIdPtr phys_dest_reg = renamedDestIdx(idx);
|
||||
if (phys_dest_reg->isPinned()) {
|
||||
phys_dest_reg->incrNumPinnedWrites();
|
||||
if (isPinnedRegsWritten())
|
||||
|
||||
@@ -83,13 +83,31 @@ class DynInst : public ExecContext, public RefCounted
|
||||
// The list of instructions iterator type.
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
struct Arrays
|
||||
{
|
||||
size_t numSrcs;
|
||||
size_t numDests;
|
||||
|
||||
RegId *flatDestIdx;
|
||||
PhysRegIdPtr *destIdx;
|
||||
PhysRegIdPtr *prevDestIdx;
|
||||
PhysRegIdPtr *srcIdx;
|
||||
uint8_t *readySrcIdx;
|
||||
};
|
||||
|
||||
static void *operator new(size_t count, Arrays &arrays);
|
||||
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
|
||||
¯oop, const PCStateBase &pc, const PCStateBase &pred_pc,
|
||||
InstSeqNum seq_num, CPU *cpu);
|
||||
DynInst(const Arrays &arrays, const StaticInstPtr &staticInst,
|
||||
const StaticInstPtr ¯oop, InstSeqNum seq_num, CPU *cpu);
|
||||
|
||||
DynInst(const Arrays &arrays, const StaticInstPtr &staticInst,
|
||||
const StaticInstPtr ¯oop, const PCStateBase &pc,
|
||||
const PCStateBase &pred_pc, InstSeqNum seq_num, CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
DynInst(const StaticInstPtr &_staticInst, const StaticInstPtr &_macroop);
|
||||
DynInst(const Arrays &arrays, const StaticInstPtr &_staticInst,
|
||||
const StaticInstPtr &_macroop);
|
||||
|
||||
~DynInst();
|
||||
|
||||
@@ -197,165 +215,103 @@ class DynInst : public ExecContext, public RefCounted
|
||||
*/
|
||||
std::vector<short> _destMiscRegIdx;
|
||||
|
||||
/**
|
||||
* Collect register related information into a single struct. The number of
|
||||
* source and destination registers can vary, and storage for information
|
||||
* about them needs to be allocated dynamically. This class figures out
|
||||
* how much space is needed and allocates it all at once, and then
|
||||
* trivially divies it up for each type of per-register array.
|
||||
*/
|
||||
struct Regs
|
||||
{
|
||||
private:
|
||||
size_t _numSrcs;
|
||||
size_t _numDests;
|
||||
size_t _numSrcs;
|
||||
size_t _numDests;
|
||||
|
||||
using BackingStorePtr = std::unique_ptr<uint8_t[]>;
|
||||
using BufCursor = BackingStorePtr::pointer;
|
||||
// Flattened register index of the destination registers of this
|
||||
// instruction.
|
||||
RegId *_flatDestIdx;
|
||||
|
||||
BackingStorePtr buf;
|
||||
// Physical register index of the destination registers of this
|
||||
// instruction.
|
||||
PhysRegIdPtr *_destIdx;
|
||||
|
||||
// Members should be ordered based on required alignment so that they
|
||||
// can be allocated contiguously.
|
||||
// Physical register index of the previous producers of the
|
||||
// architected destinations.
|
||||
PhysRegIdPtr *_prevDestIdx;
|
||||
|
||||
// Flattened register index of the destination registers of this
|
||||
// instruction.
|
||||
RegId *_flatDestIdx;
|
||||
// Physical register index of the source registers of this instruction.
|
||||
PhysRegIdPtr *_srcIdx;
|
||||
|
||||
// Physical register index of the destination registers of this
|
||||
// instruction.
|
||||
PhysRegIdPtr *_destIdx;
|
||||
|
||||
// Physical register index of the previous producers of the
|
||||
// architected destinations.
|
||||
PhysRegIdPtr *_prevDestIdx;
|
||||
|
||||
static inline size_t
|
||||
bytesForDests(size_t num)
|
||||
{
|
||||
return (sizeof(RegId) + 2 * sizeof(PhysRegIdPtr)) * num;
|
||||
}
|
||||
|
||||
// Physical register index of the source registers of this instruction.
|
||||
PhysRegIdPtr *_srcIdx;
|
||||
|
||||
// Whether or not the source register is ready, one bit per register.
|
||||
uint8_t *_readySrcIdx;
|
||||
|
||||
static inline size_t
|
||||
bytesForSources(size_t num)
|
||||
{
|
||||
return sizeof(PhysRegIdPtr) * num +
|
||||
sizeof(uint8_t) * ((num + 7) / 8);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline void
|
||||
allocate(T *&ptr, BufCursor &cur, size_t count)
|
||||
{
|
||||
ptr = new (cur) T[count];
|
||||
cur += sizeof(T) * count;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t numSrcs() const { return _numSrcs; }
|
||||
size_t numDests() const { return _numDests; }
|
||||
|
||||
void
|
||||
init()
|
||||
{
|
||||
std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
|
||||
}
|
||||
|
||||
Regs(size_t srcs, size_t dests) : _numSrcs(srcs), _numDests(dests),
|
||||
buf(new uint8_t[bytesForSources(srcs) + bytesForDests(dests)])
|
||||
{
|
||||
BufCursor cur = buf.get();
|
||||
allocate(_flatDestIdx, cur, dests);
|
||||
allocate(_destIdx, cur, dests);
|
||||
allocate(_prevDestIdx, cur, dests);
|
||||
allocate(_srcIdx, cur, srcs);
|
||||
allocate(_readySrcIdx, cur, (srcs + 7) / 8);
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
// Returns the flattened register index of the idx'th destination
|
||||
// register.
|
||||
const RegId &
|
||||
flattenedDestIdx(int idx) const
|
||||
{
|
||||
return _flatDestIdx[idx];
|
||||
}
|
||||
|
||||
// Flattens a destination architectural register index into a logical
|
||||
// index.
|
||||
void
|
||||
flattenedDestIdx(int idx, const RegId ®_id)
|
||||
{
|
||||
_flatDestIdx[idx] = reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the idx'th destination
|
||||
// register.
|
||||
PhysRegIdPtr
|
||||
renamedDestIdx(int idx) const
|
||||
{
|
||||
return _destIdx[idx];
|
||||
}
|
||||
|
||||
// Set the renamed dest register id.
|
||||
void
|
||||
renamedDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_destIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the previous physical
|
||||
// register that remapped to the same logical register index.
|
||||
PhysRegIdPtr
|
||||
prevDestIdx(int idx) const
|
||||
{
|
||||
return _prevDestIdx[idx];
|
||||
}
|
||||
|
||||
// Set the previous renamed dest register id.
|
||||
void
|
||||
prevDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_prevDestIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the i'th source register.
|
||||
PhysRegIdPtr
|
||||
renamedSrcIdx(int idx) const
|
||||
{
|
||||
return _srcIdx[idx];
|
||||
}
|
||||
|
||||
void
|
||||
renamedSrcIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_srcIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
bool
|
||||
readySrcIdx(int idx) const
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
return bits(byte, idx % 8);
|
||||
}
|
||||
|
||||
void
|
||||
readySrcIdx(int idx, bool ready)
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
replaceBits(byte, idx % 8, ready ? 1 : 0);
|
||||
}
|
||||
};
|
||||
// Whether or not the source register is ready, one bit per register.
|
||||
uint8_t *_readySrcIdx;
|
||||
|
||||
public:
|
||||
Regs regs;
|
||||
size_t numSrcs() const { return _numSrcs; }
|
||||
size_t numDests() const { return _numDests; }
|
||||
|
||||
// Returns the flattened register index of the idx'th destination
|
||||
// register.
|
||||
const RegId &
|
||||
flattenedDestIdx(int idx) const
|
||||
{
|
||||
return _flatDestIdx[idx];
|
||||
}
|
||||
|
||||
// Flattens a destination architectural register index into a logical
|
||||
// index.
|
||||
void
|
||||
flattenedDestIdx(int idx, const RegId ®_id)
|
||||
{
|
||||
_flatDestIdx[idx] = reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the idx'th destination
|
||||
// register.
|
||||
PhysRegIdPtr
|
||||
renamedDestIdx(int idx) const
|
||||
{
|
||||
return _destIdx[idx];
|
||||
}
|
||||
|
||||
// Set the renamed dest register id.
|
||||
void
|
||||
renamedDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_destIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the previous physical
|
||||
// register that remapped to the same logical register index.
|
||||
PhysRegIdPtr
|
||||
prevDestIdx(int idx) const
|
||||
{
|
||||
return _prevDestIdx[idx];
|
||||
}
|
||||
|
||||
// Set the previous renamed dest register id.
|
||||
void
|
||||
prevDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_prevDestIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the i'th source register.
|
||||
PhysRegIdPtr
|
||||
renamedSrcIdx(int idx) const
|
||||
{
|
||||
return _srcIdx[idx];
|
||||
}
|
||||
|
||||
void
|
||||
renamedSrcIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_srcIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
bool
|
||||
readySrcIdx(int idx) const
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
return bits(byte, idx % 8);
|
||||
}
|
||||
|
||||
void
|
||||
readySrcIdx(int idx, bool ready)
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
replaceBits(byte, idx % 8, ready ? 1 : 0);
|
||||
}
|
||||
|
||||
/** The thread this instruction is from. */
|
||||
ThreadID threadNumber = 0;
|
||||
@@ -507,8 +463,8 @@ class DynInst : public ExecContext, public RefCounted
|
||||
renameDestReg(int idx, PhysRegIdPtr renamed_dest,
|
||||
PhysRegIdPtr previous_rename)
|
||||
{
|
||||
regs.renamedDestIdx(idx, renamed_dest);
|
||||
regs.prevDestIdx(idx, previous_rename);
|
||||
renamedDestIdx(idx, renamed_dest);
|
||||
prevDestIdx(idx, previous_rename);
|
||||
if (renamed_dest->isPinned())
|
||||
setPinnedRegsRenamed();
|
||||
}
|
||||
@@ -520,7 +476,7 @@ class DynInst : public ExecContext, public RefCounted
|
||||
void
|
||||
renameSrcReg(int idx, PhysRegIdPtr renamed_src)
|
||||
{
|
||||
regs.renamedSrcIdx(idx, renamed_src);
|
||||
renamedSrcIdx(idx, renamed_src);
|
||||
}
|
||||
|
||||
/** Dumps out contents of this BaseDynInst. */
|
||||
@@ -725,10 +681,10 @@ class DynInst : public ExecContext, public RefCounted
|
||||
}
|
||||
|
||||
/** Returns the number of source registers. */
|
||||
size_t numSrcRegs() const { return regs.numSrcs(); }
|
||||
size_t numSrcRegs() const { return numSrcs(); }
|
||||
|
||||
/** Returns the number of destination registers. */
|
||||
size_t numDestRegs() const { return regs.numDests(); }
|
||||
size_t numDestRegs() const { return numDests(); }
|
||||
|
||||
// the following are used to track physical register usage
|
||||
// for machines with separate int & FP reg files
|
||||
@@ -1143,7 +1099,7 @@ class DynInst : public ExecContext, public RefCounted
|
||||
{
|
||||
|
||||
for (int idx = 0; idx < numDestRegs(); idx++) {
|
||||
PhysRegIdPtr prev_phys_reg = regs.prevDestIdx(idx);
|
||||
PhysRegIdPtr prev_phys_reg = prevDestIdx(idx);
|
||||
const RegId& original_dest_reg = staticInst->destRegIdx(idx);
|
||||
switch (original_dest_reg.classValue()) {
|
||||
case IntRegClass:
|
||||
@@ -1198,19 +1154,19 @@ class DynInst : public ExecContext, public RefCounted
|
||||
RegVal
|
||||
readIntRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return cpu->readIntReg(regs.renamedSrcIdx(idx));
|
||||
return cpu->readIntReg(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
RegVal
|
||||
readFloatRegOperandBits(const StaticInst *si, int idx) override
|
||||
{
|
||||
return cpu->readFloatReg(regs.renamedSrcIdx(idx));
|
||||
return cpu->readFloatReg(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
const TheISA::VecRegContainer&
|
||||
readVecRegOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->readVecReg(regs.renamedSrcIdx(idx));
|
||||
return cpu->readVecReg(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1219,31 +1175,31 @@ class DynInst : public ExecContext, public RefCounted
|
||||
TheISA::VecRegContainer&
|
||||
getWritableVecRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return cpu->getWritableVecReg(regs.renamedDestIdx(idx));
|
||||
return cpu->getWritableVecReg(renamedDestIdx(idx));
|
||||
}
|
||||
|
||||
RegVal
|
||||
readVecElemOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->readVecElem(regs.renamedSrcIdx(idx));
|
||||
return cpu->readVecElem(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
const TheISA::VecPredRegContainer&
|
||||
readVecPredRegOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->readVecPredReg(regs.renamedSrcIdx(idx));
|
||||
return cpu->readVecPredReg(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
TheISA::VecPredRegContainer&
|
||||
getWritableVecPredRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return cpu->getWritableVecPredReg(regs.renamedDestIdx(idx));
|
||||
return cpu->getWritableVecPredReg(renamedDestIdx(idx));
|
||||
}
|
||||
|
||||
RegVal
|
||||
readCCRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return cpu->readCCReg(regs.renamedSrcIdx(idx));
|
||||
return cpu->readCCReg(renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** @todo: Make results into arrays so they can handle multiple dest
|
||||
@@ -1252,14 +1208,14 @@ class DynInst : public ExecContext, public RefCounted
|
||||
void
|
||||
setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
cpu->setIntReg(regs.renamedDestIdx(idx), val);
|
||||
cpu->setIntReg(renamedDestIdx(idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
|
||||
void
|
||||
setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
cpu->setFloatReg(regs.renamedDestIdx(idx), val);
|
||||
cpu->setFloatReg(renamedDestIdx(idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
|
||||
@@ -1267,7 +1223,7 @@ class DynInst : public ExecContext, public RefCounted
|
||||
setVecRegOperand(const StaticInst *si, int idx,
|
||||
const TheISA::VecRegContainer& val) override
|
||||
{
|
||||
cpu->setVecReg(regs.renamedDestIdx(idx), val);
|
||||
cpu->setVecReg(renamedDestIdx(idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
|
||||
@@ -1275,7 +1231,7 @@ class DynInst : public ExecContext, public RefCounted
|
||||
setVecElemOperand(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
int reg_idx = idx;
|
||||
cpu->setVecElem(regs.renamedDestIdx(reg_idx), val);
|
||||
cpu->setVecElem(renamedDestIdx(reg_idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
|
||||
@@ -1283,14 +1239,14 @@ class DynInst : public ExecContext, public RefCounted
|
||||
setVecPredRegOperand(const StaticInst *si, int idx,
|
||||
const TheISA::VecPredRegContainer& val) override
|
||||
{
|
||||
cpu->setVecPredReg(regs.renamedDestIdx(idx), val);
|
||||
cpu->setVecPredReg(renamedDestIdx(idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
|
||||
void
|
||||
setCCRegOperand(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
cpu->setCCReg(regs.renamedDestIdx(idx), val);
|
||||
cpu->setCCReg(renamedDestIdx(idx), val);
|
||||
setResult(val);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1048,9 +1048,13 @@ Fetch::buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
// Get a sequence number.
|
||||
InstSeqNum seq = cpu->getAndIncrementInstSeq();
|
||||
|
||||
DynInst::Arrays arrays;
|
||||
arrays.numSrcs = staticInst->numSrcRegs();
|
||||
arrays.numDests = staticInst->numDestRegs();
|
||||
|
||||
// Create a new DynInst from the instruction fetched.
|
||||
DynInstPtr instruction =
|
||||
new DynInst(staticInst, curMacroop, this_pc, next_pc, seq, cpu);
|
||||
DynInstPtr instruction = new (arrays) DynInst(
|
||||
arrays, staticInst, curMacroop, this_pc, next_pc, seq, cpu);
|
||||
instruction->setTid(tid);
|
||||
|
||||
instruction->setThreadState(cpu->thread[tid]);
|
||||
|
||||
@@ -1410,12 +1410,12 @@ IEW::writebackInsts()
|
||||
|
||||
for (int i = 0; i < inst->numDestRegs(); i++) {
|
||||
// Mark register as ready if not pinned
|
||||
if (inst->regs.renamedDestIdx(i)->
|
||||
if (inst->renamedDestIdx(i)->
|
||||
getNumPinnedWritesToComplete() == 0) {
|
||||
DPRINTF(IEW,"Setting Destination Register %i (%s)\n",
|
||||
inst->regs.renamedDestIdx(i)->index(),
|
||||
inst->regs.renamedDestIdx(i)->className());
|
||||
scoreboard->setReg(inst->regs.renamedDestIdx(i));
|
||||
inst->renamedDestIdx(i)->index(),
|
||||
inst->renamedDestIdx(i)->className());
|
||||
scoreboard->setReg(inst->renamedDestIdx(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1002,7 +1002,7 @@ InstructionQueue::wakeDependents(const DynInstPtr &completed_inst)
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg =
|
||||
completed_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
completed_inst->renamedDestIdx(dest_reg_idx);
|
||||
|
||||
// Special case of uniq or control registers. They are not
|
||||
// handled by the IQ and thus have no dependency graph entry.
|
||||
@@ -1234,7 +1234,7 @@ InstructionQueue::doSquash(ThreadID tid)
|
||||
src_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr src_reg =
|
||||
squashed_inst->regs.renamedSrcIdx(src_reg_idx);
|
||||
squashed_inst->renamedSrcIdx(src_reg_idx);
|
||||
|
||||
// Only remove it from the dependency graph if it
|
||||
// was placed there in the first place.
|
||||
@@ -1245,7 +1245,7 @@ InstructionQueue::doSquash(ThreadID tid)
|
||||
// overwritten. The only downside to this is it
|
||||
// leaves more room for error.
|
||||
|
||||
if (!squashed_inst->regs.readySrcIdx(src_reg_idx) &&
|
||||
if (!squashed_inst->readySrcIdx(src_reg_idx) &&
|
||||
!src_reg->isFixedMapping()) {
|
||||
dependGraph.remove(src_reg->flatIndex(),
|
||||
squashed_inst);
|
||||
@@ -1307,7 +1307,7 @@ InstructionQueue::doSquash(ThreadID tid)
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg =
|
||||
squashed_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
squashed_inst->renamedDestIdx(dest_reg_idx);
|
||||
if (dest_reg->isFixedMapping()){
|
||||
continue;
|
||||
}
|
||||
@@ -1339,8 +1339,8 @@ InstructionQueue::addToDependents(const DynInstPtr &new_inst)
|
||||
src_reg_idx++)
|
||||
{
|
||||
// Only add it to the dependency graph if it's not ready.
|
||||
if (!new_inst->regs.readySrcIdx(src_reg_idx)) {
|
||||
PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx);
|
||||
if (!new_inst->readySrcIdx(src_reg_idx)) {
|
||||
PhysRegIdPtr src_reg = new_inst->renamedSrcIdx(src_reg_idx);
|
||||
|
||||
// Check the IQ's scoreboard to make sure the register
|
||||
// hasn't become ready while the instruction was in flight
|
||||
@@ -1386,7 +1386,7 @@ InstructionQueue::addToProducers(const DynInstPtr &new_inst)
|
||||
dest_reg_idx < total_dest_regs;
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
PhysRegIdPtr dest_reg = new_inst->renamedDestIdx(dest_reg_idx);
|
||||
|
||||
// Some registers have fixed mapping, and there is no need to track
|
||||
// dependencies as these instructions must be executed at commit.
|
||||
|
||||
@@ -254,7 +254,7 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
|
||||
if (!src_reg.is(MiscRegClass) &&
|
||||
!(src_reg.is(IntRegClass) && src_reg.index() == zeroReg)) {
|
||||
// Get the physical register index of the i'th source register.
|
||||
PhysRegIdPtr phys_src_reg = dyn_inst->regs.renamedSrcIdx(src_idx);
|
||||
PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
|
||||
DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
|
||||
" %i (%s)\n", seq_num,
|
||||
phys_src_reg->flatIndex(), phys_src_reg->className());
|
||||
@@ -288,7 +288,7 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
|
||||
// Get the physical register index of the i'th destination
|
||||
// register.
|
||||
PhysRegIdPtr phys_dest_reg =
|
||||
dyn_inst->regs.renamedDestIdx(dest_idx);
|
||||
dyn_inst->renamedDestIdx(dest_idx);
|
||||
DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
|
||||
" %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
|
||||
dest_reg.className());
|
||||
|
||||
@@ -1089,7 +1089,7 @@ Rename::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
|
||||
|
||||
rename_result = map->rename(flat_dest_regid);
|
||||
|
||||
inst->regs.flattenedDestIdx(dest_idx, flat_dest_regid);
|
||||
inst->flattenedDestIdx(dest_idx, flat_dest_regid);
|
||||
|
||||
scoreboard->unsetReg(rename_result.first);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user