cpu: Replace fixed sized arrays in the O3 inst with variable arrays.
The only way to allocate fixed sized arrays which will definitely be big enough for all source/destination registers for a given instruction is to track the maximum number of each at compile time, and then size the arrays appropriately. That creates a point of centralization which prevents breaking up decoder and instruction definitions into more modular pieces, and if multiple ISAs are ever built at once, would require coordination between all ISAs, and wasting memory for most of them. The dynamic allocation overhead is minimized by allocating the storage for all variable arrays in one chunk, and then placing the arrays there using placement new. There is still some overhead, although less than it might be otherwise. Change-Id: Id2c42869cba944deb97da01ca9e0e70186e22532 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38384 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -43,6 +43,7 @@
|
||||
#ifndef __CPU_BASE_DYN_INST_HH__
|
||||
#define __CPU_BASE_DYN_INST_HH__
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <deque>
|
||||
@@ -91,11 +92,6 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
// The list of instructions iterator type.
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
enum {
|
||||
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, /// Max source regs
|
||||
MaxInstDestRegs = TheISA::MaxInstDestRegs /// Max dest regs
|
||||
};
|
||||
|
||||
protected:
|
||||
enum Status {
|
||||
IqEntry, /// Instruction is in the IQ
|
||||
@@ -182,12 +178,168 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
std::bitset<NumStatus> status;
|
||||
|
||||
protected:
|
||||
/** Whether or not the source register is ready.
|
||||
* @todo: Not sure this should be here vs the derived class.
|
||||
/**
|
||||
* Collect register related information into a single struct. The number of
|
||||
* source and destination registers can vary, and storage for information
|
||||
* about them needs to be allocated dynamically. This class figures out
|
||||
* how much space is needed and allocates it all at once, and then
|
||||
* trivially divies it up for each type of per-register array.
|
||||
*/
|
||||
std::bitset<MaxInstSrcRegs> _readySrcRegIdx;
|
||||
struct Regs
|
||||
{
|
||||
private:
|
||||
size_t _numSrcs;
|
||||
size_t _numDests;
|
||||
|
||||
size_t srcsReady = 0;
|
||||
|
||||
using BackingStorePtr = std::unique_ptr<uint8_t[]>;
|
||||
using BufCursor = BackingStorePtr::pointer;
|
||||
|
||||
BackingStorePtr buf;
|
||||
|
||||
// Members should be ordered based on required alignment so that they
|
||||
// can be allocated contiguously.
|
||||
|
||||
// Flattened register index of the destination registers of this
|
||||
// instruction.
|
||||
RegId *_flatDestIdx;
|
||||
|
||||
// Physical register index of the destination registers of this
|
||||
// instruction.
|
||||
PhysRegIdPtr *_destIdx;
|
||||
|
||||
// Physical register index of the previous producers of the
|
||||
// architected destinations.
|
||||
PhysRegIdPtr *_prevDestIdx;
|
||||
|
||||
static inline size_t
|
||||
bytesForDests(size_t num)
|
||||
{
|
||||
return (sizeof(RegId) + 2 * sizeof(PhysRegIdPtr)) * num;
|
||||
}
|
||||
|
||||
// Physical register index of the source registers of this instruction.
|
||||
PhysRegIdPtr *_srcIdx;
|
||||
|
||||
// Whether or not the source register is ready, one bit per register.
|
||||
uint8_t *_readySrcIdx;
|
||||
|
||||
static inline size_t
|
||||
bytesForSources(size_t num)
|
||||
{
|
||||
return sizeof(PhysRegIdPtr) * num +
|
||||
sizeof(uint8_t) * ((num + 7) / 8);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static inline void
|
||||
allocate(T *&ptr, BufCursor &cur, size_t count)
|
||||
{
|
||||
ptr = new (cur) T[count];
|
||||
cur += sizeof(T) * count;
|
||||
}
|
||||
|
||||
public:
|
||||
size_t numSrcs() const { return _numSrcs; }
|
||||
size_t numDests() const { return _numDests; }
|
||||
|
||||
void
|
||||
init()
|
||||
{
|
||||
std::fill(_readySrcIdx, _readySrcIdx + (numSrcs() + 7) / 8, 0);
|
||||
}
|
||||
|
||||
Regs(size_t srcs, size_t dests) : _numSrcs(srcs), _numDests(dests),
|
||||
buf(new uint8_t[bytesForSources(srcs) + bytesForDests(dests)])
|
||||
{
|
||||
BufCursor cur = buf.get();
|
||||
allocate(_flatDestIdx, cur, dests);
|
||||
allocate(_destIdx, cur, dests);
|
||||
allocate(_prevDestIdx, cur, dests);
|
||||
allocate(_srcIdx, cur, srcs);
|
||||
allocate(_readySrcIdx, cur, (srcs + 7) / 8);
|
||||
|
||||
init();
|
||||
}
|
||||
|
||||
// Returns the flattened register index of the idx'th destination
|
||||
// register.
|
||||
const RegId &
|
||||
flattenedDestIdx(int idx) const
|
||||
{
|
||||
return _flatDestIdx[idx];
|
||||
}
|
||||
|
||||
// Flattens a destination architectural register index into a logical
|
||||
// index.
|
||||
void
|
||||
flattenedDestIdx(int idx, const RegId ®_id)
|
||||
{
|
||||
_flatDestIdx[idx] = reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the idx'th destination
|
||||
// register.
|
||||
PhysRegIdPtr
|
||||
renamedDestIdx(int idx) const
|
||||
{
|
||||
return _destIdx[idx];
|
||||
}
|
||||
|
||||
// Set the renamed dest register id.
|
||||
void
|
||||
renamedDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_destIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the previous physical
|
||||
// register that remapped to the same logical register index.
|
||||
PhysRegIdPtr
|
||||
prevDestIdx(int idx) const
|
||||
{
|
||||
return _prevDestIdx[idx];
|
||||
}
|
||||
|
||||
// Set the previous renamed dest register id.
|
||||
void
|
||||
prevDestIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_prevDestIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
// Returns the physical register index of the i'th source register.
|
||||
PhysRegIdPtr
|
||||
renamedSrcIdx(int idx) const
|
||||
{
|
||||
return _srcIdx[idx];
|
||||
}
|
||||
|
||||
void
|
||||
renamedSrcIdx(int idx, PhysRegIdPtr phys_reg_id)
|
||||
{
|
||||
_srcIdx[idx] = phys_reg_id;
|
||||
}
|
||||
|
||||
bool
|
||||
readySrcIdx(int idx) const
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
return bits(byte, idx % 8);
|
||||
}
|
||||
|
||||
void
|
||||
readySrcIdx(int idx, bool ready)
|
||||
{
|
||||
uint8_t &byte = _readySrcIdx[idx / 8];
|
||||
replaceBits(byte, idx % 8, ready ? 1 : 0);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
Regs regs;
|
||||
|
||||
/** The thread this instruction is from. */
|
||||
ThreadID threadNumber;
|
||||
|
||||
@@ -246,28 +398,6 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
uint64_t htmUid;
|
||||
uint64_t htmDepth;
|
||||
|
||||
protected:
|
||||
/** Flattened register index of the destination registers of this
|
||||
* instruction.
|
||||
*/
|
||||
std::array<RegId, TheISA::MaxInstDestRegs> _flatDestRegIdx;
|
||||
|
||||
/** Physical register index of the destination registers of this
|
||||
* instruction.
|
||||
*/
|
||||
std::array<PhysRegIdPtr, TheISA::MaxInstDestRegs> _destRegIdx;
|
||||
|
||||
/** Physical register index of the source registers of this
|
||||
* instruction.
|
||||
*/
|
||||
std::array<PhysRegIdPtr, TheISA::MaxInstSrcRegs> _srcRegIdx;
|
||||
|
||||
/** Physical register index of the previous producers of the
|
||||
* architected destinations.
|
||||
*/
|
||||
std::array<PhysRegIdPtr, TheISA::MaxInstDestRegs> _prevDestRegIdx;
|
||||
|
||||
|
||||
public:
|
||||
/** Records changes to result? */
|
||||
void recordResult(bool f) { instFlags[RecordResult] = f; }
|
||||
@@ -354,41 +484,6 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
void dumpSNList();
|
||||
#endif
|
||||
|
||||
/** Returns the physical register index of the i'th destination
|
||||
* register.
|
||||
*/
|
||||
PhysRegIdPtr
|
||||
renamedDestRegIdx(int idx) const
|
||||
{
|
||||
return _destRegIdx[idx];
|
||||
}
|
||||
|
||||
/** Returns the physical register index of the i'th source register. */
|
||||
PhysRegIdPtr
|
||||
renamedSrcRegIdx(int idx) const
|
||||
{
|
||||
assert(TheISA::MaxInstSrcRegs > idx);
|
||||
return _srcRegIdx[idx];
|
||||
}
|
||||
|
||||
/** Returns the flattened register index of the i'th destination
|
||||
* register.
|
||||
*/
|
||||
const RegId &
|
||||
flattenedDestRegIdx(int idx) const
|
||||
{
|
||||
return _flatDestRegIdx[idx];
|
||||
}
|
||||
|
||||
/** Returns the physical register index of the previous physical register
|
||||
* that remapped to the same logical register index.
|
||||
*/
|
||||
PhysRegIdPtr
|
||||
prevDestRegIdx(int idx) const
|
||||
{
|
||||
return _prevDestRegIdx[idx];
|
||||
}
|
||||
|
||||
/** Renames a destination register to a physical register. Also records
|
||||
* the previous physical register that the logical register mapped to.
|
||||
*/
|
||||
@@ -396,8 +491,8 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
renameDestReg(int idx, PhysRegIdPtr renamed_dest,
|
||||
PhysRegIdPtr previous_rename)
|
||||
{
|
||||
_destRegIdx[idx] = renamed_dest;
|
||||
_prevDestRegIdx[idx] = previous_rename;
|
||||
regs.renamedDestIdx(idx, renamed_dest);
|
||||
regs.prevDestIdx(idx, previous_rename);
|
||||
if (renamed_dest->isPinned())
|
||||
setPinnedRegsRenamed();
|
||||
}
|
||||
@@ -409,17 +504,9 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
void
|
||||
renameSrcReg(int idx, PhysRegIdPtr renamed_src)
|
||||
{
|
||||
_srcRegIdx[idx] = renamed_src;
|
||||
regs.renamedSrcIdx(idx, renamed_src);
|
||||
}
|
||||
|
||||
/** Flattens a destination architectural register index into a logical
|
||||
* index.
|
||||
*/
|
||||
void
|
||||
flattenDestReg(int idx, const RegId &flattened_dest)
|
||||
{
|
||||
_flatDestRegIdx[idx] = flattened_dest;
|
||||
}
|
||||
/** BaseDynInst constructor given a binary instruction.
|
||||
* @param staticInst A StaticInstPtr to the underlying instruction.
|
||||
* @param pc The PC state for the instruction.
|
||||
@@ -646,10 +733,10 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
{ return staticInst->branchTarget(pc); }
|
||||
|
||||
/** Returns the number of source registers. */
|
||||
int8_t numSrcRegs() const { return staticInst->numSrcRegs(); }
|
||||
size_t numSrcRegs() const { return regs.numSrcs(); }
|
||||
|
||||
/** Returns the number of destination registers. */
|
||||
int8_t numDestRegs() const { return staticInst->numDestRegs(); }
|
||||
size_t numDestRegs() const { return regs.numDests(); }
|
||||
|
||||
// the following are used to track physical register usage
|
||||
// for machines with separate int & FP reg files
|
||||
@@ -789,13 +876,6 @@ class BaseDynInst : public ExecContext, public RefCounted
|
||||
/** Marks a specific register as ready. */
|
||||
void markSrcRegReady(RegIndex src_idx);
|
||||
|
||||
/** Returns if a source register is ready. */
|
||||
bool
|
||||
isReadySrcRegIdx(int idx) const
|
||||
{
|
||||
return this->_readySrcRegIdx[idx];
|
||||
}
|
||||
|
||||
/** Sets this instruction as completed. */
|
||||
void setCompleted() { status.set(Completed); }
|
||||
|
||||
|
||||
@@ -64,6 +64,7 @@ BaseDynInst<Impl>::BaseDynInst(const StaticInstPtr &_staticInst,
|
||||
: staticInst(_staticInst), cpu(cpu),
|
||||
thread(nullptr),
|
||||
traceData(nullptr),
|
||||
regs(staticInst->numSrcRegs(), staticInst->numDestRegs()),
|
||||
macroop(_macroop),
|
||||
memData(nullptr),
|
||||
savedReq(nullptr),
|
||||
@@ -80,7 +81,9 @@ BaseDynInst<Impl>::BaseDynInst(const StaticInstPtr &_staticInst,
|
||||
template <class Impl>
|
||||
BaseDynInst<Impl>::BaseDynInst(const StaticInstPtr &_staticInst,
|
||||
const StaticInstPtr &_macroop)
|
||||
: staticInst(_staticInst), traceData(NULL), macroop(_macroop)
|
||||
: staticInst(_staticInst), traceData(NULL),
|
||||
regs(staticInst->numSrcRegs(), staticInst->numDestRegs()),
|
||||
macroop(_macroop)
|
||||
{
|
||||
seqNum = 0;
|
||||
initVars();
|
||||
@@ -214,8 +217,7 @@ template <class Impl>
|
||||
void
|
||||
BaseDynInst<Impl>::markSrcRegReady(RegIndex src_idx)
|
||||
{
|
||||
_readySrcRegIdx[src_idx] = true;
|
||||
|
||||
regs.readySrcIdx(src_idx, true);
|
||||
markSrcRegReady();
|
||||
}
|
||||
|
||||
@@ -228,7 +230,7 @@ BaseDynInst<Impl>::eaSrcsReady() const
|
||||
// stored)
|
||||
|
||||
for (int i = 1; i < numSrcRegs(); ++i) {
|
||||
if (!_readySrcRegIdx[i])
|
||||
if (!regs.readySrcIdx(i))
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -253,7 +255,7 @@ BaseDynInst<Impl>::setSquashed()
|
||||
// ensures that dest regs will be pinned to the same phys register if
|
||||
// re-rename happens.
|
||||
for (int idx = 0; idx < numDestRegs(); idx++) {
|
||||
PhysRegIdPtr phys_dest_reg = renamedDestRegIdx(idx);
|
||||
PhysRegIdPtr phys_dest_reg = regs.renamedDestIdx(idx);
|
||||
if (phys_dest_reg->isPinned()) {
|
||||
phys_dest_reg->incrNumPinnedWrites();
|
||||
if (isPinnedRegsWritten())
|
||||
|
||||
@@ -1342,8 +1342,8 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
|
||||
|
||||
// Update the commit rename map
|
||||
for (int i = 0; i < head_inst->numDestRegs(); i++) {
|
||||
renameMap[tid]->setEntry(head_inst->flattenedDestRegIdx(i),
|
||||
head_inst->renamedDestRegIdx(i));
|
||||
renameMap[tid]->setEntry(head_inst->regs.flattenedDestIdx(i),
|
||||
head_inst->regs.renamedDestIdx(i));
|
||||
}
|
||||
|
||||
// hardware transactional memory
|
||||
|
||||
@@ -63,11 +63,6 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
/** Register types. */
|
||||
static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
|
||||
|
||||
enum {
|
||||
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
|
||||
MaxInstDestRegs = TheISA::MaxInstDestRegs //< Max dest regs
|
||||
};
|
||||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
|
||||
@@ -96,8 +91,6 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
protected:
|
||||
/** Explicitation of dependent names. */
|
||||
using BaseDynInst<Impl>::cpu;
|
||||
using BaseDynInst<Impl>::_srcRegIdx;
|
||||
using BaseDynInst<Impl>::_destRegIdx;
|
||||
|
||||
/** Values to be written to the destination misc. registers. */
|
||||
std::array<RegVal, TheISA::MaxMiscDestRegs> _destMiscRegVal;
|
||||
@@ -203,7 +196,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
{
|
||||
|
||||
for (int idx = 0; idx < this->numDestRegs(); idx++) {
|
||||
PhysRegIdPtr prev_phys_reg = this->prevDestRegIdx(idx);
|
||||
PhysRegIdPtr prev_phys_reg = this->regs.prevDestIdx(idx);
|
||||
const RegId& original_dest_reg =
|
||||
this->staticInst->destRegIdx(idx);
|
||||
switch (original_dest_reg.classValue()) {
|
||||
@@ -259,19 +252,19 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
RegVal
|
||||
readIntRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return this->cpu->readIntReg(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readIntReg(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
RegVal
|
||||
readFloatRegOperandBits(const StaticInst *si, int idx) override
|
||||
{
|
||||
return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readFloatReg(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
const TheISA::VecRegContainer&
|
||||
readVecRegOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return this->cpu->readVecReg(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readVecReg(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -280,7 +273,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
TheISA::VecRegContainer&
|
||||
getWritableVecRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return this->cpu->getWritableVecReg(this->_destRegIdx[idx]);
|
||||
return this->cpu->getWritableVecReg(this->regs.renamedDestIdx(idx));
|
||||
}
|
||||
|
||||
/** Vector Register Lane Interfaces. */
|
||||
@@ -289,28 +282,32 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
ConstVecLane8
|
||||
readVec8BitLaneOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->template readVecLane<uint8_t>(_srcRegIdx[idx]);
|
||||
return cpu->template readVecLane<uint8_t>(
|
||||
this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** Reads source vector 16bit operand. */
|
||||
ConstVecLane16
|
||||
readVec16BitLaneOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->template readVecLane<uint16_t>(_srcRegIdx[idx]);
|
||||
return cpu->template readVecLane<uint16_t>(
|
||||
this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** Reads source vector 32bit operand. */
|
||||
ConstVecLane32
|
||||
readVec32BitLaneOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->template readVecLane<uint32_t>(_srcRegIdx[idx]);
|
||||
return cpu->template readVecLane<uint32_t>(
|
||||
this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** Reads source vector 64bit operand. */
|
||||
ConstVecLane64
|
||||
readVec64BitLaneOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return cpu->template readVecLane<uint64_t>(_srcRegIdx[idx]);
|
||||
return cpu->template readVecLane<uint64_t>(
|
||||
this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** Write a lane of the destination vector operand. */
|
||||
@@ -318,7 +315,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
void
|
||||
setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
|
||||
{
|
||||
return cpu->template setVecLane(_destRegIdx[idx], val);
|
||||
return cpu->template setVecLane(this->regs.renamedDestIdx(idx), val);
|
||||
}
|
||||
virtual void
|
||||
setVecLaneOperand(const StaticInst *si, int idx,
|
||||
@@ -349,25 +346,26 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
TheISA::VecElem
|
||||
readVecElemOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return this->cpu->readVecElem(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readVecElem(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
const TheISA::VecPredRegContainer&
|
||||
readVecPredRegOperand(const StaticInst *si, int idx) const override
|
||||
{
|
||||
return this->cpu->readVecPredReg(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readVecPredReg(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
TheISA::VecPredRegContainer&
|
||||
getWritableVecPredRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]);
|
||||
return this->cpu->getWritableVecPredReg(
|
||||
this->regs.renamedDestIdx(idx));
|
||||
}
|
||||
|
||||
RegVal
|
||||
readCCRegOperand(const StaticInst *si, int idx) override
|
||||
{
|
||||
return this->cpu->readCCReg(this->_srcRegIdx[idx]);
|
||||
return this->cpu->readCCReg(this->regs.renamedSrcIdx(idx));
|
||||
}
|
||||
|
||||
/** @todo: Make results into arrays so they can handle multiple dest
|
||||
@@ -376,14 +374,14 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
void
|
||||
setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
this->cpu->setIntReg(this->_destRegIdx[idx], val);
|
||||
this->cpu->setIntReg(this->regs.renamedDestIdx(idx), val);
|
||||
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
|
||||
}
|
||||
|
||||
void
|
||||
setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
this->cpu->setFloatReg(this->_destRegIdx[idx], val);
|
||||
this->cpu->setFloatReg(this->regs.renamedDestIdx(idx), val);
|
||||
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
|
||||
}
|
||||
|
||||
@@ -391,7 +389,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
setVecRegOperand(const StaticInst *si, int idx,
|
||||
const TheISA::VecRegContainer& val) override
|
||||
{
|
||||
this->cpu->setVecReg(this->_destRegIdx[idx], val);
|
||||
this->cpu->setVecReg(this->regs.renamedDestIdx(idx), val);
|
||||
BaseDynInst<Impl>::setVecRegOperand(si, idx, val);
|
||||
}
|
||||
|
||||
@@ -400,7 +398,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
const TheISA::VecElem val) override
|
||||
{
|
||||
int reg_idx = idx;
|
||||
this->cpu->setVecElem(this->_destRegIdx[reg_idx], val);
|
||||
this->cpu->setVecElem(this->regs.renamedDestIdx(reg_idx), val);
|
||||
BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
|
||||
}
|
||||
|
||||
@@ -408,13 +406,13 @@ class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
setVecPredRegOperand(const StaticInst *si, int idx,
|
||||
const TheISA::VecPredRegContainer& val) override
|
||||
{
|
||||
this->cpu->setVecPredReg(this->_destRegIdx[idx], val);
|
||||
this->cpu->setVecPredReg(this->regs.renamedDestIdx(idx), val);
|
||||
BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val);
|
||||
}
|
||||
|
||||
void setCCRegOperand(const StaticInst *si, int idx, RegVal val) override
|
||||
{
|
||||
this->cpu->setCCReg(this->_destRegIdx[idx], val);
|
||||
this->cpu->setCCReg(this->regs.renamedDestIdx(idx), val);
|
||||
BaseDynInst<Impl>::setCCRegOperand(si, idx, val);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -41,6 +41,8 @@
|
||||
#ifndef __CPU_O3_DYN_INST_IMPL_HH__
|
||||
#define __CPU_O3_DYN_INST_IMPL_HH__
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "debug/O3PipeView.hh"
|
||||
|
||||
@@ -102,7 +104,7 @@ template <class Impl>
|
||||
void
|
||||
BaseO3DynInst<Impl>::initVars()
|
||||
{
|
||||
this->_readySrcRegIdx.reset();
|
||||
this->regs.init();
|
||||
|
||||
_numDestMiscRegs = 0;
|
||||
|
||||
|
||||
@@ -1436,12 +1436,12 @@ DefaultIEW<Impl>::writebackInsts()
|
||||
|
||||
for (int i = 0; i < inst->numDestRegs(); i++) {
|
||||
// Mark register as ready if not pinned
|
||||
if (inst->renamedDestRegIdx(i)->
|
||||
if (inst->regs.renamedDestIdx(i)->
|
||||
getNumPinnedWritesToComplete() == 0) {
|
||||
DPRINTF(IEW,"Setting Destination Register %i (%s)\n",
|
||||
inst->renamedDestRegIdx(i)->index(),
|
||||
inst->renamedDestRegIdx(i)->className());
|
||||
scoreboard->setReg(inst->renamedDestRegIdx(i));
|
||||
inst->regs.renamedDestIdx(i)->index(),
|
||||
inst->regs.renamedDestIdx(i)->className());
|
||||
scoreboard->setReg(inst->regs.renamedDestIdx(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1004,7 +1004,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg =
|
||||
completed_inst->renamedDestRegIdx(dest_reg_idx);
|
||||
completed_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
|
||||
// Special case of uniq or control registers. They are not
|
||||
// handled by the IQ and thus have no dependency graph entry.
|
||||
@@ -1242,7 +1242,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
src_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr src_reg =
|
||||
squashed_inst->renamedSrcRegIdx(src_reg_idx);
|
||||
squashed_inst->regs.renamedSrcIdx(src_reg_idx);
|
||||
|
||||
// Only remove it from the dependency graph if it
|
||||
// was placed there in the first place.
|
||||
@@ -1253,7 +1253,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
// overwritten. The only downside to this is it
|
||||
// leaves more room for error.
|
||||
|
||||
if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
|
||||
if (!squashed_inst->regs.readySrcIdx(src_reg_idx) &&
|
||||
!src_reg->isFixedMapping()) {
|
||||
dependGraph.remove(src_reg->flatIndex(),
|
||||
squashed_inst);
|
||||
@@ -1315,7 +1315,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg =
|
||||
squashed_inst->renamedDestRegIdx(dest_reg_idx);
|
||||
squashed_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
if (dest_reg->isFixedMapping()){
|
||||
continue;
|
||||
}
|
||||
@@ -1341,8 +1341,8 @@ InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
|
||||
src_reg_idx++)
|
||||
{
|
||||
// Only add it to the dependency graph if it's not ready.
|
||||
if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
|
||||
PhysRegIdPtr src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
|
||||
if (!new_inst->regs.readySrcIdx(src_reg_idx)) {
|
||||
PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx);
|
||||
|
||||
// Check the IQ's scoreboard to make sure the register
|
||||
// hasn't become ready while the instruction was in flight
|
||||
@@ -1389,7 +1389,7 @@ InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
|
||||
dest_reg_idx < total_dest_regs;
|
||||
dest_reg_idx++)
|
||||
{
|
||||
PhysRegIdPtr dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
|
||||
PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx);
|
||||
|
||||
// Some registers have fixed mapping, and there is no need to track
|
||||
// dependencies as these instructions must be executed at commit.
|
||||
|
||||
@@ -243,7 +243,7 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
|
||||
if (!src_reg.isMiscReg() &&
|
||||
!src_reg.isZeroReg()) {
|
||||
// Get the physical register index of the i'th source register.
|
||||
PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
|
||||
PhysRegIdPtr phys_src_reg = dyn_inst->regs.renamedSrcIdx(src_idx);
|
||||
DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
|
||||
" %i (%s)\n", seq_num,
|
||||
phys_src_reg->flatIndex(), phys_src_reg->className());
|
||||
@@ -276,7 +276,8 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
|
||||
!dest_reg.isZeroReg()) {
|
||||
// Get the physical register index of the i'th destination
|
||||
// register.
|
||||
PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
|
||||
PhysRegIdPtr phys_dest_reg =
|
||||
dyn_inst->regs.renamedDestIdx(dest_idx);
|
||||
DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
|
||||
" %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
|
||||
dest_reg.className());
|
||||
|
||||
@@ -1108,7 +1108,7 @@ DefaultRename<Impl>::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
|
||||
|
||||
rename_result = map->rename(flat_dest_regid);
|
||||
|
||||
inst->flattenDestReg(dest_idx, flat_dest_regid);
|
||||
inst->regs.flattenedDestIdx(dest_idx, flat_dest_regid);
|
||||
|
||||
scoreboard->unsetReg(rename_result.first);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user