arch-gcn3, gpu-compute: Update getRegisterIndex() API

This change removes the GPUDynInstPtr argument from
getRegisterIndex(). The dynamic inst was only needed
to get access to its parent WF's state so it could
determine the number of scalar registers the wave was
allocated. However, we can simply pass the number of
scalar registers directly. This cuts down on shared
pointer usage.

Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42210
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Tony Gutierrez
2019-03-07 19:26:10 -05:00
committed by Matt Sinclair
parent 236b4a502f
commit 0e2564a629
10 changed files with 86 additions and 120 deletions

View File

@@ -70,7 +70,7 @@ namespace Gcn3ISA
int getOperandSize(int opIdx) override { return 0; }
int
getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
getRegisterIndex(int opIdx, int num_scalar_regs) override
{
return 0;
}

View File

@@ -128,21 +128,18 @@ namespace Gcn3ISA
}
int
Inst_SOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
switch (opIdx) {
case 0:
return opSelectorToRegIdx(instData.SSRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
case 1:
return opSelectorToRegIdx(instData.SSRC1,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
case 2:
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -244,7 +241,7 @@ namespace Gcn3ISA
}
int
Inst_SOPK::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SOPK::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -253,8 +250,7 @@ namespace Gcn3ISA
case 0:
return -1;
case 1:
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -349,7 +345,7 @@ namespace Gcn3ISA
}
int
Inst_SOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -359,14 +355,11 @@ namespace Gcn3ISA
if (instData.OP == 0x1C) {
// Special case for s_getpc, which has no source reg.
// Instead, it implicitly reads the PC.
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
}
return opSelectorToRegIdx(instData.SSRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
case 1:
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -467,18 +460,16 @@ namespace Gcn3ISA
}
int
Inst_SOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
switch (opIdx) {
case 0:
return opSelectorToRegIdx(instData.SSRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
case 1:
return opSelectorToRegIdx(instData.SSRC1,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -583,7 +574,7 @@ namespace Gcn3ISA
}
int
Inst_SOPP::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SOPP::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -691,7 +682,7 @@ namespace Gcn3ISA
}
int
Inst_SMEM::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_SMEM::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -700,8 +691,7 @@ namespace Gcn3ISA
case 0:
// SBASE has an implied LSB of 0, so we need
// to shift by one to get the actual value
return opSelectorToRegIdx(instData.SBASE << 1,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SBASE << 1, num_scalar_regs);
case 1:
if (instData.IMM) {
// operand is an immediate value, not a register
@@ -710,8 +700,7 @@ namespace Gcn3ISA
return extData.OFFSET;
}
case 2:
return opSelectorToRegIdx(instData.SDATA,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDATA, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -901,21 +890,19 @@ namespace Gcn3ISA
}
int
Inst_VOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_VOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
switch (opIdx) {
case 0:
return opSelectorToRegIdx(instData.SRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
case 1:
return instData.VSRC1;
case 2:
if (readsVCC()) {
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
} else {
return instData.VDST;
}
@@ -924,13 +911,11 @@ namespace Gcn3ISA
if (readsVCC()) {
return instData.VDST;
} else {
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
}
case 4:
assert(writesVCC() && readsVCC());
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -1044,15 +1029,14 @@ namespace Gcn3ISA
}
int
Inst_VOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_VOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
switch (opIdx) {
case 0:
return opSelectorToRegIdx(instData.SRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
case 1:
return instData.VDST;
default:
@@ -1066,6 +1050,7 @@ namespace Gcn3ISA
Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode)
: GCN3GPUStaticInst(opcode)
{
setFlag(WritesVCC);
// copy first instruction DWORD
instData = iFmt[0];
if (hasSecondDword(iFmt)) {
@@ -1167,21 +1152,19 @@ namespace Gcn3ISA
}
int
Inst_VOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_VOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
switch (opIdx) {
case 0:
return opSelectorToRegIdx(instData.SRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
case 1:
return instData.VSRC1;
case 2:
// VCC
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -1437,7 +1420,7 @@ namespace Gcn3ISA
}
int
Inst_VOP3::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_VOP3::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -1445,20 +1428,17 @@ namespace Gcn3ISA
switch (opIdx) {
case 0:
// SRC0
return opSelectorToRegIdx(extData.SRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
case 1:
if (numSrcRegOperands() > 1) {
// if we have more than 1 source operand then
// op index 1 corresponds to SRC1
return opSelectorToRegIdx(extData.SRC1,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
} else {
// if we only have 1 source operand, opIdx 1
// will be VDST
if (sgprDst) {
return opSelectorToRegIdx(instData.VDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
}
return instData.VDST;
}
@@ -1468,15 +1448,13 @@ namespace Gcn3ISA
// op index 2 corresponds to SRC2. SRC2 may be
// a scalar or vector register, an inline
// constant, or a special HW register
return opSelectorToRegIdx(extData.SRC2,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
} else if (numSrcRegOperands() == 2) {
// if we only have 2 source operands, opIdx 2
// will be VDST, and VDST is always a vector
// reg
if (sgprDst) {
return opSelectorToRegIdx(instData.VDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
}
return instData.VDST;
} else {
@@ -1484,31 +1462,27 @@ namespace Gcn3ISA
// VDST then it must be a VCC read or write,
// and VCC is never stored in a VGPR
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
}
case 3:
if (numSrcRegOperands() == 3) {
// if we have 3 source operands then op
// idx 3 will correspond to VDST
if (sgprDst) {
return opSelectorToRegIdx(instData.VDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
}
return instData.VDST;
} else {
// if this idx doesn't correspond to VDST
// then it must be a VCC read or write
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
}
case 4:
// if a VOP3 instruction has more than 4 ops
// it must read from and write to VCC
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(REG_VCC_LO,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -1731,7 +1705,7 @@ namespace Gcn3ISA
}
int
Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -1739,14 +1713,12 @@ namespace Gcn3ISA
switch (opIdx) {
case 0:
// SRC0
return opSelectorToRegIdx(extData.SRC0,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
case 1:
if (numSrcRegOperands() > 1) {
// if we have more than 1 source operand then
// op index 1 corresponds to SRC1
return opSelectorToRegIdx(extData.SRC1,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
} else {
// if we only have 1 source operand, opIdx 1
// will be VDST
@@ -1756,8 +1728,7 @@ namespace Gcn3ISA
if (numSrcRegOperands() > 2) {
// if we have more than 2 source operand then
// op index 2 corresponds to SRC2
return opSelectorToRegIdx(extData.SRC2,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
} else if (numSrcRegOperands() == 2) {
// if we only have 2 source operands, opIdx 2
// will be VDST
@@ -1766,8 +1737,7 @@ namespace Gcn3ISA
// if this idx doesn't correspond to SRCX or
// VDST then it must be a VCC read or write
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
}
case 3:
if (numSrcRegOperands() == 3) {
@@ -1778,15 +1748,13 @@ namespace Gcn3ISA
// if this idx doesn't correspond to VDST
// then it must be a VCC read or write
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
}
case 4:
// if a VOP3 instruction has more than 4 ops
// it must read from and write to VCC
assert(writesVCC() || readsVCC());
return opSelectorToRegIdx(instData.SDST,
gpuDynInst->wavefront()->reservedScalarRegs);
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
default:
fatal("Operand at idx %i does not exist\n", opIdx);
return -1;
@@ -1871,7 +1839,7 @@ namespace Gcn3ISA
}
int
Inst_DS::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_DS::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -1993,7 +1961,7 @@ namespace Gcn3ISA
}
int
Inst_MUBUF::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_MUBUF::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());
@@ -2170,7 +2138,7 @@ namespace Gcn3ISA
}
int
Inst_FLAT::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
Inst_FLAT::getRegisterIndex(int opIdx, int num_scalar_regs)
{
assert(opIdx >= 0);
assert(opIdx < getNumOperands());

View File

@@ -79,7 +79,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -103,7 +103,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -127,7 +127,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -151,7 +151,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -175,7 +175,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -193,7 +193,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
/**
@@ -276,7 +276,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -300,7 +300,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -324,7 +324,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -361,7 +361,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -395,7 +395,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
// first instruction DWORD
@@ -418,7 +418,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>
@@ -518,7 +518,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>
@@ -649,7 +649,7 @@ namespace Gcn3ISA
* non-formatted accesses, this is done on a per-lane
* basis.
*/
if (stride == 0 || !rsrc_desc.swizzleEn) {
if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
if (buf_off + stride * buf_idx >=
rsrc_desc.numRecords - s_offset.rawData()) {
DPRINTF(GCN3, "mubuf out-of-bounds condition 1: "
@@ -657,13 +657,13 @@ namespace Gcn3ISA
"const_stride = %llx, "
"const_num_records = %llx\n",
lane, buf_off + stride * buf_idx,
stride, rsrc_desc.numRecords);
rsrc_desc.stride, rsrc_desc.numRecords);
oobMask.set(lane);
continue;
}
}
if (stride != 0 && rsrc_desc.swizzleEn) {
if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
if (buf_idx >= rsrc_desc.numRecords ||
buf_off >= stride) {
DPRINTF(GCN3, "mubuf out-of-bounds condition 2: "
@@ -776,7 +776,7 @@ namespace Gcn3ISA
bool isScalarRegister(int opIdx) override;
bool isVectorRegister(int opIdx) override;
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
protected:
template<typename T>

View File

@@ -557,7 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts()
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
gpu_dyn_inst->initOperandInfo();
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
@@ -598,7 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
wavefront, gpu_static_inst,
wavefront->computeUnit->
getAndIncSeqNum());
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
gpu_dyn_inst->initOperandInfo();
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "

View File

@@ -85,9 +85,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
}
void
GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
GPUDynInst::initOperandInfo()
{
assert(gpu_dyn_inst->wavefront());
/**
* Generate and cache the operand to register mapping information. This
* prevents this info from being generated multiple times throughout
@@ -102,7 +101,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
int op_num_dwords(-1);
if (isVectorRegister(op_idx)) {
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
virt_idx = getRegisterIndex(op_idx);
op_num_dwords = numOpdDWORDs(op_idx);
if (isSrcOperand(op_idx)) {
@@ -143,7 +142,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
phys_indices);
}
} else if (isScalarRegister(op_idx)) {
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
virt_idx = getRegisterIndex(op_idx);
op_num_dwords = numOpdDWORDs(op_idx);
if (isSrcOperand(op_idx)) {
@@ -287,9 +286,9 @@ GPUDynInst::isScalarRegister(int operandIdx)
}
int
GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
GPUDynInst::getRegisterIndex(int operandIdx)
{
return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
}
int

View File

@@ -117,7 +117,7 @@ class GPUDynInst : public GPUExecContext
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
uint64_t instSeqNum);
~GPUDynInst();
void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
void initOperandInfo();
void execute(GPUDynInstPtr gpuDynInst);
const std::vector<RegisterOperandInfo>&
@@ -159,7 +159,7 @@ class GPUDynInst : public GPUExecContext
int getNumOperands();
bool isVectorRegister(int operandIdx);
bool isScalarRegister(int operandIdx);
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
int getRegisterIndex(int operandIdx);
int getOperandSize(int operandIdx);
bool isDstOperand(int operandIdx);
bool isSrcOperand(int operandIdx);

View File

@@ -86,8 +86,7 @@ class GPUStaticInst : public GPUStaticInstFlags
virtual bool isExecMaskRegister(int opIdx) = 0;
virtual int getOperandSize(int operandIndex) = 0;
virtual int getRegisterIndex(int operandIndex,
GPUDynInstPtr gpuDynInst) = 0;
virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0;
virtual int numDstRegOperands() = 0;
virtual int numSrcRegOperands() = 0;
@@ -310,7 +309,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
int getOperandSize(int operandIndex) override { return 0; }
int
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
getRegisterIndex(int operandIndex, int num_scalar_regs) override
{
return 0;
}

View File

@@ -53,7 +53,7 @@ ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i, ii);
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
@@ -84,7 +84,7 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i, ii);
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
@@ -116,7 +116,7 @@ ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i, ii);
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1
: ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; j++) {
@@ -139,7 +139,7 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w,
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
int sgprIdx = ii->getRegisterIndex(i, ii);
int sgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;

View File

@@ -59,7 +59,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
{
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i, ii);
int vgprIdx = ii->getRegisterIndex(i);
// determine number of registers
int nRegs =
@@ -89,7 +89,7 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
// iterate over all register destination operands
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i, ii);
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
@@ -150,7 +150,7 @@ VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i, ii);
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1
: ii->getOperandSize(i) / 4;
for (int j = 0; j < nRegs; j++) {
@@ -183,7 +183,7 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad(
assert(ii->isLoad() || ii->isAtomicRet());
for (int i = 0; i < ii->getNumOperands(); ++i) {
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
int vgprIdx = ii->getRegisterIndex(i, ii);
int vgprIdx = ii->getRegisterIndex(i);
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;

View File

@@ -927,7 +927,7 @@ Wavefront::exec()
// vector RAW dependency tracking
for (int i = 0; i < ii->getNumOperands(); i++) {
if (ii->isVectorRegister(i)) {
int vgpr = ii->getRegisterIndex(i, ii);
int vgpr = ii->getRegisterIndex(i);
int nReg = ii->getOperandSize(i) <= 4 ? 1 :
ii->getOperandSize(i) / 4;
for (int n = 0; n < nReg; n++) {