cpu: Replace fixed sized arrays in the O3 inst with variable arrays.

The only way to allocate fixed sized arrays which will definitely be big
enough for all source/destination registers for a given instruction is
to track the maximum number of each at compile time, and then size the
arrays appropriately. That creates a point of centralization which
prevents breaking up decoder and instruction definitions into more
modular pieces, and if multiple ISAs are ever built at once, would
require coordination between all ISAs, and wasting memory for most of
them.

The dynamic allocation overhead is minimized by allocating the storage
for all variable arrays in one chunk, and then placing the arrays there
using placement new. There is still some overhead, although less than it
might be otherwise.

Change-Id: Id2c42869cba944deb97da01ca9e0e70186e22532
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38384
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2020-12-07 04:41:45 -08:00
parent 57dd228cad
commit 344ea0330a
9 changed files with 217 additions and 134 deletions

View File

@@ -1342,8 +1342,8 @@ DefaultCommit<Impl>::commitHead(const DynInstPtr &head_inst, unsigned inst_num)
// Update the commit rename map
for (int i = 0; i < head_inst->numDestRegs(); i++) {
renameMap[tid]->setEntry(head_inst->flattenedDestRegIdx(i),
head_inst->renamedDestRegIdx(i));
renameMap[tid]->setEntry(head_inst->regs.flattenedDestIdx(i),
head_inst->regs.renamedDestIdx(i));
}
// hardware transactional memory

View File

@@ -63,11 +63,6 @@ class BaseO3DynInst : public BaseDynInst<Impl>
/** Register types. */
static constexpr auto NumVecElemPerVecReg = TheISA::NumVecElemPerVecReg;
enum {
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
MaxInstDestRegs = TheISA::MaxInstDestRegs //< Max dest regs
};
public:
/** BaseDynInst constructor given a binary instruction. */
BaseO3DynInst(const StaticInstPtr &staticInst, const StaticInstPtr
@@ -96,8 +91,6 @@ class BaseO3DynInst : public BaseDynInst<Impl>
protected:
/** Explicitation of dependent names. */
using BaseDynInst<Impl>::cpu;
using BaseDynInst<Impl>::_srcRegIdx;
using BaseDynInst<Impl>::_destRegIdx;
/** Values to be written to the destination misc. registers. */
std::array<RegVal, TheISA::MaxMiscDestRegs> _destMiscRegVal;
@@ -203,7 +196,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
{
for (int idx = 0; idx < this->numDestRegs(); idx++) {
PhysRegIdPtr prev_phys_reg = this->prevDestRegIdx(idx);
PhysRegIdPtr prev_phys_reg = this->regs.prevDestIdx(idx);
const RegId& original_dest_reg =
this->staticInst->destRegIdx(idx);
switch (original_dest_reg.classValue()) {
@@ -259,19 +252,19 @@ class BaseO3DynInst : public BaseDynInst<Impl>
RegVal
readIntRegOperand(const StaticInst *si, int idx) override
{
return this->cpu->readIntReg(this->_srcRegIdx[idx]);
return this->cpu->readIntReg(this->regs.renamedSrcIdx(idx));
}
RegVal
readFloatRegOperandBits(const StaticInst *si, int idx) override
{
return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
return this->cpu->readFloatReg(this->regs.renamedSrcIdx(idx));
}
const TheISA::VecRegContainer&
readVecRegOperand(const StaticInst *si, int idx) const override
{
return this->cpu->readVecReg(this->_srcRegIdx[idx]);
return this->cpu->readVecReg(this->regs.renamedSrcIdx(idx));
}
/**
@@ -280,7 +273,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
TheISA::VecRegContainer&
getWritableVecRegOperand(const StaticInst *si, int idx) override
{
return this->cpu->getWritableVecReg(this->_destRegIdx[idx]);
return this->cpu->getWritableVecReg(this->regs.renamedDestIdx(idx));
}
/** Vector Register Lane Interfaces. */
@@ -289,28 +282,32 @@ class BaseO3DynInst : public BaseDynInst<Impl>
ConstVecLane8
readVec8BitLaneOperand(const StaticInst *si, int idx) const override
{
return cpu->template readVecLane<uint8_t>(_srcRegIdx[idx]);
return cpu->template readVecLane<uint8_t>(
this->regs.renamedSrcIdx(idx));
}
/** Reads source vector 16bit operand. */
ConstVecLane16
readVec16BitLaneOperand(const StaticInst *si, int idx) const override
{
return cpu->template readVecLane<uint16_t>(_srcRegIdx[idx]);
return cpu->template readVecLane<uint16_t>(
this->regs.renamedSrcIdx(idx));
}
/** Reads source vector 32bit operand. */
ConstVecLane32
readVec32BitLaneOperand(const StaticInst *si, int idx) const override
{
return cpu->template readVecLane<uint32_t>(_srcRegIdx[idx]);
return cpu->template readVecLane<uint32_t>(
this->regs.renamedSrcIdx(idx));
}
/** Reads source vector 64bit operand. */
ConstVecLane64
readVec64BitLaneOperand(const StaticInst *si, int idx) const override
{
return cpu->template readVecLane<uint64_t>(_srcRegIdx[idx]);
return cpu->template readVecLane<uint64_t>(
this->regs.renamedSrcIdx(idx));
}
/** Write a lane of the destination vector operand. */
@@ -318,7 +315,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
void
setVecLaneOperandT(const StaticInst *si, int idx, const LD& val)
{
return cpu->template setVecLane(_destRegIdx[idx], val);
return cpu->template setVecLane(this->regs.renamedDestIdx(idx), val);
}
virtual void
setVecLaneOperand(const StaticInst *si, int idx,
@@ -349,25 +346,26 @@ class BaseO3DynInst : public BaseDynInst<Impl>
TheISA::VecElem
readVecElemOperand(const StaticInst *si, int idx) const override
{
return this->cpu->readVecElem(this->_srcRegIdx[idx]);
return this->cpu->readVecElem(this->regs.renamedSrcIdx(idx));
}
const TheISA::VecPredRegContainer&
readVecPredRegOperand(const StaticInst *si, int idx) const override
{
return this->cpu->readVecPredReg(this->_srcRegIdx[idx]);
return this->cpu->readVecPredReg(this->regs.renamedSrcIdx(idx));
}
TheISA::VecPredRegContainer&
getWritableVecPredRegOperand(const StaticInst *si, int idx) override
{
return this->cpu->getWritableVecPredReg(this->_destRegIdx[idx]);
return this->cpu->getWritableVecPredReg(
this->regs.renamedDestIdx(idx));
}
RegVal
readCCRegOperand(const StaticInst *si, int idx) override
{
return this->cpu->readCCReg(this->_srcRegIdx[idx]);
return this->cpu->readCCReg(this->regs.renamedSrcIdx(idx));
}
/** @todo: Make results into arrays so they can handle multiple dest
@@ -376,14 +374,14 @@ class BaseO3DynInst : public BaseDynInst<Impl>
void
setIntRegOperand(const StaticInst *si, int idx, RegVal val) override
{
this->cpu->setIntReg(this->_destRegIdx[idx], val);
this->cpu->setIntReg(this->regs.renamedDestIdx(idx), val);
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
}
void
setFloatRegOperandBits(const StaticInst *si, int idx, RegVal val) override
{
this->cpu->setFloatReg(this->_destRegIdx[idx], val);
this->cpu->setFloatReg(this->regs.renamedDestIdx(idx), val);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
@@ -391,7 +389,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
setVecRegOperand(const StaticInst *si, int idx,
const TheISA::VecRegContainer& val) override
{
this->cpu->setVecReg(this->_destRegIdx[idx], val);
this->cpu->setVecReg(this->regs.renamedDestIdx(idx), val);
BaseDynInst<Impl>::setVecRegOperand(si, idx, val);
}
@@ -400,7 +398,7 @@ class BaseO3DynInst : public BaseDynInst<Impl>
const TheISA::VecElem val) override
{
int reg_idx = idx;
this->cpu->setVecElem(this->_destRegIdx[reg_idx], val);
this->cpu->setVecElem(this->regs.renamedDestIdx(reg_idx), val);
BaseDynInst<Impl>::setVecElemOperand(si, idx, val);
}
@@ -408,13 +406,13 @@ class BaseO3DynInst : public BaseDynInst<Impl>
setVecPredRegOperand(const StaticInst *si, int idx,
const TheISA::VecPredRegContainer& val) override
{
this->cpu->setVecPredReg(this->_destRegIdx[idx], val);
this->cpu->setVecPredReg(this->regs.renamedDestIdx(idx), val);
BaseDynInst<Impl>::setVecPredRegOperand(si, idx, val);
}
void setCCRegOperand(const StaticInst *si, int idx, RegVal val) override
{
this->cpu->setCCReg(this->_destRegIdx[idx], val);
this->cpu->setCCReg(this->regs.renamedDestIdx(idx), val);
BaseDynInst<Impl>::setCCRegOperand(si, idx, val);
}
};

View File

@@ -41,6 +41,8 @@
#ifndef __CPU_O3_DYN_INST_IMPL_HH__
#define __CPU_O3_DYN_INST_IMPL_HH__
#include <algorithm>
#include "cpu/o3/dyn_inst.hh"
#include "debug/O3PipeView.hh"
@@ -102,7 +104,7 @@ template <class Impl>
void
BaseO3DynInst<Impl>::initVars()
{
this->_readySrcRegIdx.reset();
this->regs.init();
_numDestMiscRegs = 0;

View File

@@ -1436,12 +1436,12 @@ DefaultIEW<Impl>::writebackInsts()
for (int i = 0; i < inst->numDestRegs(); i++) {
// Mark register as ready if not pinned
if (inst->renamedDestRegIdx(i)->
if (inst->regs.renamedDestIdx(i)->
getNumPinnedWritesToComplete() == 0) {
DPRINTF(IEW,"Setting Destination Register %i (%s)\n",
inst->renamedDestRegIdx(i)->index(),
inst->renamedDestRegIdx(i)->className());
scoreboard->setReg(inst->renamedDestRegIdx(i));
inst->regs.renamedDestIdx(i)->index(),
inst->regs.renamedDestIdx(i)->className());
scoreboard->setReg(inst->regs.renamedDestIdx(i));
}
}

View File

@@ -1004,7 +1004,7 @@ InstructionQueue<Impl>::wakeDependents(const DynInstPtr &completed_inst)
dest_reg_idx++)
{
PhysRegIdPtr dest_reg =
completed_inst->renamedDestRegIdx(dest_reg_idx);
completed_inst->regs.renamedDestIdx(dest_reg_idx);
// Special case of uniq or control registers. They are not
// handled by the IQ and thus have no dependency graph entry.
@@ -1242,7 +1242,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
src_reg_idx++)
{
PhysRegIdPtr src_reg =
squashed_inst->renamedSrcRegIdx(src_reg_idx);
squashed_inst->regs.renamedSrcIdx(src_reg_idx);
// Only remove it from the dependency graph if it
// was placed there in the first place.
@@ -1253,7 +1253,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
// overwritten. The only downside to this is it
// leaves more room for error.
if (!squashed_inst->isReadySrcRegIdx(src_reg_idx) &&
if (!squashed_inst->regs.readySrcIdx(src_reg_idx) &&
!src_reg->isFixedMapping()) {
dependGraph.remove(src_reg->flatIndex(),
squashed_inst);
@@ -1315,7 +1315,7 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
dest_reg_idx++)
{
PhysRegIdPtr dest_reg =
squashed_inst->renamedDestRegIdx(dest_reg_idx);
squashed_inst->regs.renamedDestIdx(dest_reg_idx);
if (dest_reg->isFixedMapping()){
continue;
}
@@ -1341,8 +1341,8 @@ InstructionQueue<Impl>::addToDependents(const DynInstPtr &new_inst)
src_reg_idx++)
{
// Only add it to the dependency graph if it's not ready.
if (!new_inst->isReadySrcRegIdx(src_reg_idx)) {
PhysRegIdPtr src_reg = new_inst->renamedSrcRegIdx(src_reg_idx);
if (!new_inst->regs.readySrcIdx(src_reg_idx)) {
PhysRegIdPtr src_reg = new_inst->regs.renamedSrcIdx(src_reg_idx);
// Check the IQ's scoreboard to make sure the register
// hasn't become ready while the instruction was in flight
@@ -1389,7 +1389,7 @@ InstructionQueue<Impl>::addToProducers(const DynInstPtr &new_inst)
dest_reg_idx < total_dest_regs;
dest_reg_idx++)
{
PhysRegIdPtr dest_reg = new_inst->renamedDestRegIdx(dest_reg_idx);
PhysRegIdPtr dest_reg = new_inst->regs.renamedDestIdx(dest_reg_idx);
// Some registers have fixed mapping, and there is no need to track
// dependencies as these instructions must be executed at commit.

View File

@@ -243,7 +243,7 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
if (!src_reg.isMiscReg() &&
!src_reg.isZeroReg()) {
// Get the physical register index of the i'th source register.
PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcRegIdx(src_idx);
PhysRegIdPtr phys_src_reg = dyn_inst->regs.renamedSrcIdx(src_idx);
DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
" %i (%s)\n", seq_num,
phys_src_reg->flatIndex(), phys_src_reg->className());
@@ -276,7 +276,8 @@ ElasticTrace::updateRegDep(const DynInstConstPtr& dyn_inst)
!dest_reg.isZeroReg()) {
// Get the physical register index of the i'th destination
// register.
PhysRegIdPtr phys_dest_reg = dyn_inst->renamedDestRegIdx(dest_idx);
PhysRegIdPtr phys_dest_reg =
dyn_inst->regs.renamedDestIdx(dest_idx);
DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
" %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
dest_reg.className());

View File

@@ -1108,7 +1108,7 @@ DefaultRename<Impl>::renameDestRegs(const DynInstPtr &inst, ThreadID tid)
rename_result = map->rename(flat_dest_regid);
inst->flattenDestReg(dest_idx, flat_dest_regid);
inst->regs.flattenedDestIdx(dest_idx, flat_dest_regid);
scoreboard->unsetReg(rename_result.first);