gpu-compute: Add operand info class to GPUDynInst
This change adds a class that stores operand register info for the GPUDynInst. The operand info is calculated when the instruction object is created and stored for easy access by the RF, etc. Change-Id: I3cf267942e54fe60fcb4224d3b88da08a1a0226e Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42209 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matt Sinclair
parent
9ddfe09649
commit
236b4a502f
@@ -168,6 +168,12 @@ namespace Gcn3ISA
|
||||
typedef int64_t VecElemI64;
|
||||
typedef double VecElemF64;
|
||||
|
||||
const int DWORDSize = sizeof(VecElemU32);
|
||||
/**
|
||||
* Size of a single-precision register in DWORDs.
|
||||
*/
|
||||
const int RegSizeDWORDs = sizeof(VecElemU32) / DWORDSize;
|
||||
|
||||
// typedefs for the various sizes/types of vector regs
|
||||
using VecRegU8 = ::VecRegT<VecElemU8, NumVecElemPerVecReg, false>;
|
||||
using VecRegI8 = ::VecRegT<VecElemI8, NumVecElemPerVecReg, false>;
|
||||
|
||||
@@ -80,6 +80,7 @@ DebugFlag('GPUInitAbi')
|
||||
DebugFlag('GPUDisp')
|
||||
DebugFlag('GPUExec')
|
||||
DebugFlag('GPUFetch')
|
||||
DebugFlag('GPUInst')
|
||||
DebugFlag('GPUKernelInfo')
|
||||
DebugFlag('GPUMem')
|
||||
DebugFlag('GPUPort')
|
||||
|
||||
@@ -557,6 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
|
||||
@@ -597,6 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
|
||||
#include "gpu-compute/gpu_dyn_inst.hh"
|
||||
|
||||
#include "debug/GPUInst.hh"
|
||||
#include "debug/GPUMem.hh"
|
||||
#include "gpu-compute/gpu_static_inst.hh"
|
||||
#include "gpu-compute/scalar_register_file.hh"
|
||||
@@ -43,7 +44,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
GPUStaticInst *static_inst, InstSeqNum instSeqNum)
|
||||
: GPUExecContext(_cu, _wf), scalarAddr(0), addr(computeUnit()->wfSize(),
|
||||
(Addr)0), numScalarReqs(0), isSaveRestore(false),
|
||||
_staticInst(static_inst), _seqNum(instSeqNum)
|
||||
_staticInst(static_inst), _seqNum(instSeqNum),
|
||||
maxSrcVecRegOpSize(0), maxSrcScalarRegOpSize(0)
|
||||
{
|
||||
statusVector.assign(TheGpuISA::NumVecElemPerVecReg, 0);
|
||||
tlbHitLevel.assign(computeUnit()->wfSize(), -1);
|
||||
@@ -82,6 +84,109 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
{
|
||||
assert(gpu_dyn_inst->wavefront());
|
||||
/**
|
||||
* Generate and cache the operand to register mapping information. This
|
||||
* prevents this info from being generated multiple times throughout
|
||||
* the CU pipeline.
|
||||
*/
|
||||
DPRINTF(GPUInst, "%s: generating operand info for %d operands\n",
|
||||
disassemble(), getNumOperands());
|
||||
|
||||
for (int op_idx = 0; op_idx < getNumOperands(); ++op_idx) {
|
||||
int virt_idx(-1);
|
||||
int phys_idx(-1);
|
||||
int op_num_dwords(-1);
|
||||
|
||||
if (isVectorRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
|
||||
if (op_num_dwords > maxSrcVecRegOpSize) {
|
||||
maxSrcVecRegOpSize = op_num_dwords;
|
||||
}
|
||||
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding vector src (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
srcVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
|
||||
phys_indices);
|
||||
} else {
|
||||
assert(isDstOperand(op_idx));
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding vector dst (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapVgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
dstVecRegOps.emplace_back(op_idx, op_num_dwords, virt_indices,
|
||||
phys_indices);
|
||||
}
|
||||
} else if (isScalarRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
|
||||
if (op_num_dwords > maxSrcScalarRegOpSize) {
|
||||
maxSrcScalarRegOpSize = op_num_dwords;
|
||||
}
|
||||
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding scalar src (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
srcScalarRegOps.emplace_back(op_idx, op_num_dwords,
|
||||
virt_indices, phys_indices);
|
||||
} else {
|
||||
assert(isDstOperand(op_idx));
|
||||
std::vector<int> virt_indices;
|
||||
std::vector<int> phys_indices;
|
||||
for (int i = 0; i < op_num_dwords; ++i) {
|
||||
phys_idx = computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx + i);
|
||||
virt_indices.push_back(virt_idx + i);
|
||||
phys_indices.push_back(phys_idx);
|
||||
}
|
||||
DPRINTF(GPUInst, "%s adding scalar dst (%d->%d) operand "
|
||||
"that uses %d registers.\n", disassemble(),
|
||||
virt_idx, computeUnit()->registerManager->
|
||||
mapSgpr(wavefront(), virt_idx), op_num_dwords);
|
||||
dstScalarRegOps.emplace_back(op_idx, op_num_dwords,
|
||||
virt_indices, phys_indices);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GPUDynInst::~GPUDynInst()
|
||||
{
|
||||
delete[] d_data;
|
||||
@@ -110,15 +215,39 @@ GPUDynInst::numDstRegOperands()
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcVecOperands()
|
||||
GPUDynInst::numSrcVecRegOperands() const
|
||||
{
|
||||
return _staticInst->numSrcVecOperands();
|
||||
return srcVecRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstVecOperands()
|
||||
GPUDynInst::numDstVecRegOperands() const
|
||||
{
|
||||
return _staticInst->numDstVecOperands();
|
||||
return dstVecRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::maxSrcVecRegOperandSize() const
|
||||
{
|
||||
return maxSrcVecRegOpSize;
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numSrcScalarRegOperands() const
|
||||
{
|
||||
return srcScalarRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::numDstScalarRegOperands() const
|
||||
{
|
||||
return dstScalarRegOps.size();
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::maxSrcScalarRegOperandSize() const
|
||||
{
|
||||
return maxSrcScalarRegOpSize;
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -74,17 +74,85 @@ class AtomicOpCAS : public TypedAtomicOpFunctor<T>
|
||||
AtomicOpFunctor* clone () { return new AtomicOpCAS(c, s, computeUnit); }
|
||||
};
|
||||
|
||||
class RegisterOperandInfo
|
||||
{
|
||||
public:
|
||||
RegisterOperandInfo() = delete;
|
||||
RegisterOperandInfo(int op_idx, int num_dwords,
|
||||
const std::vector<int> &virt_indices,
|
||||
const std::vector<int> &phys_indices)
|
||||
: opIdx(op_idx), numDWORDs(num_dwords), virtIndices(virt_indices),
|
||||
physIndices(phys_indices)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* The number of registers required to store this operand.
|
||||
*/
|
||||
int numRegisters() const { return numDWORDs / TheGpuISA::RegSizeDWORDs; }
|
||||
int operandIdx() const { return opIdx; }
|
||||
/**
|
||||
* We typically only need the first virtual register for the operand
|
||||
* regardless of its size.
|
||||
*/
|
||||
int virtIdx(int reg_num=0) const { return virtIndices.at(reg_num); }
|
||||
|
||||
private:
|
||||
/**
|
||||
* Index of this operand within the set of its parent instruction's
|
||||
* operand list.
|
||||
*/
|
||||
const int opIdx;
|
||||
/**
|
||||
* Size of this operand in DWORDs.
|
||||
*/
|
||||
const int numDWORDs;
|
||||
const std::vector<int> virtIndices;
|
||||
const std::vector<int> physIndices;
|
||||
};
|
||||
|
||||
class GPUDynInst : public GPUExecContext
|
||||
{
|
||||
public:
|
||||
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
|
||||
uint64_t instSeqNum);
|
||||
~GPUDynInst();
|
||||
void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
srcVecRegOperands() const
|
||||
{
|
||||
return srcVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
dstVecRegOperands() const
|
||||
{
|
||||
return dstVecRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
srcScalarRegOperands() const
|
||||
{
|
||||
return srcScalarRegOps;
|
||||
}
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
dstScalarRegOperands() const
|
||||
{
|
||||
return dstScalarRegOps;
|
||||
}
|
||||
|
||||
int numSrcVecRegOperands() const;
|
||||
int numDstVecRegOperands() const;
|
||||
int maxSrcVecRegOperandSize() const;
|
||||
int numSrcScalarRegOperands() const;
|
||||
int numDstScalarRegOperands() const;
|
||||
int maxSrcScalarRegOperandSize() const;
|
||||
|
||||
int numSrcRegOperands();
|
||||
int numDstRegOperands();
|
||||
int numDstVecOperands();
|
||||
int numSrcVecOperands();
|
||||
int numSrcVecDWORDs();
|
||||
int numDstVecDWORDs();
|
||||
int numOpdDWORDs(int operandIdx);
|
||||
@@ -428,6 +496,8 @@ class GPUDynInst : public GPUExecContext
|
||||
private:
|
||||
GPUStaticInst *_staticInst;
|
||||
const InstSeqNum _seqNum;
|
||||
int maxSrcVecRegOpSize;
|
||||
int maxSrcScalarRegOpSize;
|
||||
|
||||
// the time the request was started
|
||||
Tick accessTime = -1;
|
||||
@@ -439,6 +509,12 @@ class GPUDynInst : public GPUExecContext
|
||||
// hold each cache block address for the instruction and a vector
|
||||
// to hold the tick when the block arrives at certain hop points
|
||||
std::map<Addr, std::vector<Tick>> lineAddressTime;
|
||||
|
||||
// Operand info.
|
||||
std::vector<RegisterOperandInfo> srcVecRegOps;
|
||||
std::vector<RegisterOperandInfo> dstVecRegOps;
|
||||
std::vector<RegisterOperandInfo> srcScalarRegOps;
|
||||
std::vector<RegisterOperandInfo> dstScalarRegOps;
|
||||
};
|
||||
|
||||
#endif // __GPU_DYN_INST_HH__
|
||||
|
||||
@@ -904,8 +904,8 @@ Wavefront::exec()
|
||||
}
|
||||
computeUnit->srf[simdId]->waveExecuteInst(this, ii);
|
||||
|
||||
computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecOperands());
|
||||
computeUnit->shader->incVectorInstDstOperand(ii->numDstVecOperands());
|
||||
computeUnit->shader->incVectorInstSrcOperand(ii->numSrcVecRegOperands());
|
||||
computeUnit->shader->incVectorInstDstOperand(ii->numDstVecRegOperands());
|
||||
computeUnit->stats.numInstrExecuted++;
|
||||
stats.numInstrExecuted++;
|
||||
computeUnit->instExecPerSimd[simdId]++;
|
||||
|
||||
Reference in New Issue
Block a user