arch-gcn3, gpu-compute: Update getRegisterIndex() API
This change removes the GPUDynInstPtr argument from getRegisterIndex(). The dynamic inst was only needed to get access to its parent WF's state so it could determine the number of scalar registers the wave was allocated. However, we can simply pass the number of scalar registers directly. This cuts down on shared pointer usage. Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42210 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matt Sinclair
parent
236b4a502f
commit
0e2564a629
@@ -557,7 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
|
||||
@@ -598,7 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
|
||||
|
||||
@@ -85,9 +85,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
}
|
||||
|
||||
void
|
||||
GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
GPUDynInst::initOperandInfo()
|
||||
{
|
||||
assert(gpu_dyn_inst->wavefront());
|
||||
/**
|
||||
* Generate and cache the operand to register mapping information. This
|
||||
* prevents this info from being generated multiple times throughout
|
||||
@@ -102,7 +101,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
int op_num_dwords(-1);
|
||||
|
||||
if (isVectorRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
@@ -143,7 +142,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
phys_indices);
|
||||
}
|
||||
} else if (isScalarRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
@@ -287,9 +286,9 @@ GPUDynInst::isScalarRegister(int operandIdx)
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
|
||||
GPUDynInst::getRegisterIndex(int operandIdx)
|
||||
{
|
||||
return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
|
||||
return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -117,7 +117,7 @@ class GPUDynInst : public GPUExecContext
|
||||
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
|
||||
uint64_t instSeqNum);
|
||||
~GPUDynInst();
|
||||
void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
|
||||
void initOperandInfo();
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
@@ -159,7 +159,7 @@ class GPUDynInst : public GPUExecContext
|
||||
int getNumOperands();
|
||||
bool isVectorRegister(int operandIdx);
|
||||
bool isScalarRegister(int operandIdx);
|
||||
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
|
||||
int getRegisterIndex(int operandIdx);
|
||||
int getOperandSize(int operandIdx);
|
||||
bool isDstOperand(int operandIdx);
|
||||
bool isSrcOperand(int operandIdx);
|
||||
|
||||
@@ -86,8 +86,7 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||
virtual bool isExecMaskRegister(int opIdx) = 0;
|
||||
virtual int getOperandSize(int operandIndex) = 0;
|
||||
|
||||
virtual int getRegisterIndex(int operandIndex,
|
||||
GPUDynInstPtr gpuDynInst) = 0;
|
||||
virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0;
|
||||
|
||||
virtual int numDstRegOperands() = 0;
|
||||
virtual int numSrcRegOperands() = 0;
|
||||
@@ -310,7 +309,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
|
||||
int getOperandSize(int operandIndex) override { return 0; }
|
||||
|
||||
int
|
||||
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
|
||||
getRegisterIndex(int operandIndex, int num_scalar_regs) override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -84,7 +84,7 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -116,7 +116,7 @@ ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
@@ -139,7 +139,7 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w,
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
{
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
|
||||
// determine number of registers
|
||||
int nRegs =
|
||||
@@ -89,7 +89,7 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
// iterate over all register destination operands
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -150,7 +150,7 @@ VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
@@ -183,7 +183,7 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad(
|
||||
assert(ii->isLoad() || ii->isAtomicRet());
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
|
||||
@@ -927,7 +927,7 @@ Wavefront::exec()
|
||||
// vector RAW dependency tracking
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i)) {
|
||||
int vgpr = ii->getRegisterIndex(i, ii);
|
||||
int vgpr = ii->getRegisterIndex(i);
|
||||
int nReg = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
for (int n = 0; n < nReg; n++) {
|
||||
|
||||
Reference in New Issue
Block a user