arch-gcn3, gpu-compute: Update getRegisterIndex() API
This change removes the GPUDynInstPtr argument from getRegisterIndex(). The dynamic inst was only needed to get access to its parent WF's state so it could determine the number of scalar registers the wave was allocated. However, we can simply pass the number of scalar registers directly. This cuts down on shared pointer usage. Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42210 Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
committed by
Matt Sinclair
parent
236b4a502f
commit
0e2564a629
@@ -70,7 +70,7 @@ namespace Gcn3ISA
|
||||
int getOperandSize(int opIdx) override { return 0; }
|
||||
|
||||
int
|
||||
getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override
|
||||
getRegisterIndex(int opIdx, int num_scalar_regs) override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -128,21 +128,18 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
return opSelectorToRegIdx(instData.SSRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return opSelectorToRegIdx(instData.SSRC1,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
|
||||
case 2:
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -244,7 +241,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SOPK::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SOPK::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -253,8 +250,7 @@ namespace Gcn3ISA
|
||||
case 0:
|
||||
return -1;
|
||||
case 1:
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -349,7 +345,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -359,14 +355,11 @@ namespace Gcn3ISA
|
||||
if (instData.OP == 0x1C) {
|
||||
// Special case for s_getpc, which has no source reg.
|
||||
// Instead, it implicitly reads the PC.
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
}
|
||||
return opSelectorToRegIdx(instData.SSRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -467,18 +460,16 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
return opSelectorToRegIdx(instData.SSRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return opSelectorToRegIdx(instData.SSRC1,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -583,7 +574,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SOPP::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SOPP::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -691,7 +682,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_SMEM::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_SMEM::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -700,8 +691,7 @@ namespace Gcn3ISA
|
||||
case 0:
|
||||
// SBASE has an implied LSB of 0, so we need
|
||||
// to shift by one to get the actual value
|
||||
return opSelectorToRegIdx(instData.SBASE << 1,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SBASE << 1, num_scalar_regs);
|
||||
case 1:
|
||||
if (instData.IMM) {
|
||||
// operand is an immediate value, not a register
|
||||
@@ -710,8 +700,7 @@ namespace Gcn3ISA
|
||||
return extData.OFFSET;
|
||||
}
|
||||
case 2:
|
||||
return opSelectorToRegIdx(instData.SDATA,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDATA, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -901,21 +890,19 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_VOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP2::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
return opSelectorToRegIdx(instData.SRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return instData.VSRC1;
|
||||
case 2:
|
||||
if (readsVCC()) {
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
} else {
|
||||
return instData.VDST;
|
||||
}
|
||||
@@ -924,13 +911,11 @@ namespace Gcn3ISA
|
||||
if (readsVCC()) {
|
||||
return instData.VDST;
|
||||
} else {
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
}
|
||||
case 4:
|
||||
assert(writesVCC() && readsVCC());
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -1044,15 +1029,14 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_VOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP1::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
return opSelectorToRegIdx(instData.SRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return instData.VDST;
|
||||
default:
|
||||
@@ -1066,6 +1050,7 @@ namespace Gcn3ISA
|
||||
Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode)
|
||||
: GCN3GPUStaticInst(opcode)
|
||||
{
|
||||
setFlag(WritesVCC);
|
||||
// copy first instruction DWORD
|
||||
instData = iFmt[0];
|
||||
if (hasSecondDword(iFmt)) {
|
||||
@@ -1167,21 +1152,19 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_VOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOPC::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
return opSelectorToRegIdx(instData.SRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SRC0, num_scalar_regs);
|
||||
case 1:
|
||||
return instData.VSRC1;
|
||||
case 2:
|
||||
// VCC
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -1437,7 +1420,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_VOP3::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -1445,20 +1428,17 @@ namespace Gcn3ISA
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
// SRC0
|
||||
return opSelectorToRegIdx(extData.SRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
|
||||
case 1:
|
||||
if (numSrcRegOperands() > 1) {
|
||||
// if we have more than 1 source operand then
|
||||
// op index 1 corresponds to SRC1
|
||||
return opSelectorToRegIdx(extData.SRC1,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
|
||||
} else {
|
||||
// if we only have 1 source operand, opIdx 1
|
||||
// will be VDST
|
||||
if (sgprDst) {
|
||||
return opSelectorToRegIdx(instData.VDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
|
||||
}
|
||||
return instData.VDST;
|
||||
}
|
||||
@@ -1468,15 +1448,13 @@ namespace Gcn3ISA
|
||||
// op index 2 corresponds to SRC2. SRC2 may be
|
||||
// a scalar or vector register, an inline
|
||||
// constant, or a special HW register
|
||||
return opSelectorToRegIdx(extData.SRC2,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
|
||||
} else if (numSrcRegOperands() == 2) {
|
||||
// if we only have 2 source operands, opIdx 2
|
||||
// will be VDST, and VDST is always a vector
|
||||
// reg
|
||||
if (sgprDst) {
|
||||
return opSelectorToRegIdx(instData.VDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
|
||||
}
|
||||
return instData.VDST;
|
||||
} else {
|
||||
@@ -1484,31 +1462,27 @@ namespace Gcn3ISA
|
||||
// VDST then it must be a VCC read or write,
|
||||
// and VCC is never stored in a VGPR
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
}
|
||||
case 3:
|
||||
if (numSrcRegOperands() == 3) {
|
||||
// if we have 3 source operands then op
|
||||
// idx 3 will correspond to VDST
|
||||
if (sgprDst) {
|
||||
return opSelectorToRegIdx(instData.VDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.VDST, num_scalar_regs);
|
||||
}
|
||||
return instData.VDST;
|
||||
} else {
|
||||
// if this idx doesn't correspond to VDST
|
||||
// then it must be a VCC read or write
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
}
|
||||
case 4:
|
||||
// if a VOP3 instruction has more than 4 ops
|
||||
// it must read from and write to VCC
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(REG_VCC_LO,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -1731,7 +1705,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -1739,14 +1713,12 @@ namespace Gcn3ISA
|
||||
switch (opIdx) {
|
||||
case 0:
|
||||
// SRC0
|
||||
return opSelectorToRegIdx(extData.SRC0,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC0, num_scalar_regs);
|
||||
case 1:
|
||||
if (numSrcRegOperands() > 1) {
|
||||
// if we have more than 1 source operand then
|
||||
// op index 1 corresponds to SRC1
|
||||
return opSelectorToRegIdx(extData.SRC1,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC1, num_scalar_regs);
|
||||
} else {
|
||||
// if we only have 1 source operand, opIdx 1
|
||||
// will be VDST
|
||||
@@ -1756,8 +1728,7 @@ namespace Gcn3ISA
|
||||
if (numSrcRegOperands() > 2) {
|
||||
// if we have more than 2 source operand then
|
||||
// op index 2 corresponds to SRC2
|
||||
return opSelectorToRegIdx(extData.SRC2,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(extData.SRC2, num_scalar_regs);
|
||||
} else if (numSrcRegOperands() == 2) {
|
||||
// if we only have 2 source operands, opIdx 2
|
||||
// will be VDST
|
||||
@@ -1766,8 +1737,7 @@ namespace Gcn3ISA
|
||||
// if this idx doesn't correspond to SRCX or
|
||||
// VDST then it must be a VCC read or write
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
}
|
||||
case 3:
|
||||
if (numSrcRegOperands() == 3) {
|
||||
@@ -1778,15 +1748,13 @@ namespace Gcn3ISA
|
||||
// if this idx doesn't correspond to VDST
|
||||
// then it must be a VCC read or write
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
}
|
||||
case 4:
|
||||
// if a VOP3 instruction has more than 4 ops
|
||||
// it must read from and write to VCC
|
||||
assert(writesVCC() || readsVCC());
|
||||
return opSelectorToRegIdx(instData.SDST,
|
||||
gpuDynInst->wavefront()->reservedScalarRegs);
|
||||
return opSelectorToRegIdx(instData.SDST, num_scalar_regs);
|
||||
default:
|
||||
fatal("Operand at idx %i does not exist\n", opIdx);
|
||||
return -1;
|
||||
@@ -1871,7 +1839,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_DS::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_DS::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -1993,7 +1961,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_MUBUF::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_MUBUF::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
@@ -2170,7 +2138,7 @@ namespace Gcn3ISA
|
||||
}
|
||||
|
||||
int
|
||||
Inst_FLAT::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst)
|
||||
Inst_FLAT::getRegisterIndex(int opIdx, int num_scalar_regs)
|
||||
{
|
||||
assert(opIdx >= 0);
|
||||
assert(opIdx < getNumOperands());
|
||||
|
||||
@@ -79,7 +79,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -103,7 +103,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -127,7 +127,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -151,7 +151,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -175,7 +175,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -193,7 +193,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
/**
|
||||
@@ -276,7 +276,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -300,7 +300,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -324,7 +324,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -361,7 +361,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -395,7 +395,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
// first instruction DWORD
|
||||
@@ -418,7 +418,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
@@ -518,7 +518,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
@@ -649,7 +649,7 @@ namespace Gcn3ISA
|
||||
* non-formatted accesses, this is done on a per-lane
|
||||
* basis.
|
||||
*/
|
||||
if (stride == 0 || !rsrc_desc.swizzleEn) {
|
||||
if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) {
|
||||
if (buf_off + stride * buf_idx >=
|
||||
rsrc_desc.numRecords - s_offset.rawData()) {
|
||||
DPRINTF(GCN3, "mubuf out-of-bounds condition 1: "
|
||||
@@ -657,13 +657,13 @@ namespace Gcn3ISA
|
||||
"const_stride = %llx, "
|
||||
"const_num_records = %llx\n",
|
||||
lane, buf_off + stride * buf_idx,
|
||||
stride, rsrc_desc.numRecords);
|
||||
rsrc_desc.stride, rsrc_desc.numRecords);
|
||||
oobMask.set(lane);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (stride != 0 && rsrc_desc.swizzleEn) {
|
||||
if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) {
|
||||
if (buf_idx >= rsrc_desc.numRecords ||
|
||||
buf_off >= stride) {
|
||||
DPRINTF(GCN3, "mubuf out-of-bounds condition 2: "
|
||||
@@ -776,7 +776,7 @@ namespace Gcn3ISA
|
||||
|
||||
bool isScalarRegister(int opIdx) override;
|
||||
bool isVectorRegister(int opIdx) override;
|
||||
int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override;
|
||||
int getRegisterIndex(int opIdx, int num_scalar_regs) override;
|
||||
|
||||
protected:
|
||||
template<typename T>
|
||||
|
||||
@@ -557,7 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
|
||||
@@ -598,7 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst()
|
||||
wavefront, gpu_static_inst,
|
||||
wavefront->computeUnit->
|
||||
getAndIncSeqNum());
|
||||
gpu_dyn_inst->initOperandInfo(gpu_dyn_inst);
|
||||
gpu_dyn_inst->initOperandInfo();
|
||||
wavefront->instructionBuffer.push_back(gpu_dyn_inst);
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
|
||||
|
||||
@@ -85,9 +85,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf,
|
||||
}
|
||||
|
||||
void
|
||||
GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
GPUDynInst::initOperandInfo()
|
||||
{
|
||||
assert(gpu_dyn_inst->wavefront());
|
||||
/**
|
||||
* Generate and cache the operand to register mapping information. This
|
||||
* prevents this info from being generated multiple times throughout
|
||||
@@ -102,7 +101,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
int op_num_dwords(-1);
|
||||
|
||||
if (isVectorRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
@@ -143,7 +142,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst)
|
||||
phys_indices);
|
||||
}
|
||||
} else if (isScalarRegister(op_idx)) {
|
||||
virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst);
|
||||
virt_idx = getRegisterIndex(op_idx);
|
||||
op_num_dwords = numOpdDWORDs(op_idx);
|
||||
|
||||
if (isSrcOperand(op_idx)) {
|
||||
@@ -287,9 +286,9 @@ GPUDynInst::isScalarRegister(int operandIdx)
|
||||
}
|
||||
|
||||
int
|
||||
GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst)
|
||||
GPUDynInst::getRegisterIndex(int operandIdx)
|
||||
{
|
||||
return _staticInst->getRegisterIndex(operandIdx, gpuDynInst);
|
||||
return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs);
|
||||
}
|
||||
|
||||
int
|
||||
|
||||
@@ -117,7 +117,7 @@ class GPUDynInst : public GPUExecContext
|
||||
GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst,
|
||||
uint64_t instSeqNum);
|
||||
~GPUDynInst();
|
||||
void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst);
|
||||
void initOperandInfo();
|
||||
void execute(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
const std::vector<RegisterOperandInfo>&
|
||||
@@ -159,7 +159,7 @@ class GPUDynInst : public GPUExecContext
|
||||
int getNumOperands();
|
||||
bool isVectorRegister(int operandIdx);
|
||||
bool isScalarRegister(int operandIdx);
|
||||
int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst);
|
||||
int getRegisterIndex(int operandIdx);
|
||||
int getOperandSize(int operandIdx);
|
||||
bool isDstOperand(int operandIdx);
|
||||
bool isSrcOperand(int operandIdx);
|
||||
|
||||
@@ -86,8 +86,7 @@ class GPUStaticInst : public GPUStaticInstFlags
|
||||
virtual bool isExecMaskRegister(int opIdx) = 0;
|
||||
virtual int getOperandSize(int operandIndex) = 0;
|
||||
|
||||
virtual int getRegisterIndex(int operandIndex,
|
||||
GPUDynInstPtr gpuDynInst) = 0;
|
||||
virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0;
|
||||
|
||||
virtual int numDstRegOperands() = 0;
|
||||
virtual int numSrcRegOperands() = 0;
|
||||
@@ -310,7 +309,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
|
||||
int getOperandSize(int operandIndex) override { return 0; }
|
||||
|
||||
int
|
||||
getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override
|
||||
getRegisterIndex(int operandIndex, int num_scalar_regs) override
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -53,7 +53,7 @@ ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -84,7 +84,7 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -116,7 +116,7 @@ ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
@@ -139,7 +139,7 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w,
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isScalarRegister(i) && ii->isDstOperand(i)) {
|
||||
|
||||
int sgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int sgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const
|
||||
{
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
|
||||
// determine number of registers
|
||||
int nRegs =
|
||||
@@ -89,7 +89,7 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii)
|
||||
// iterate over all register destination operands
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
@@ -150,7 +150,7 @@ VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii)
|
||||
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1
|
||||
: ii->getOperandSize(i) / 4;
|
||||
for (int j = 0; j < nRegs; j++) {
|
||||
@@ -183,7 +183,7 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad(
|
||||
assert(ii->isLoad() || ii->isAtomicRet());
|
||||
for (int i = 0; i < ii->getNumOperands(); ++i) {
|
||||
if (ii->isVectorRegister(i) && ii->isDstOperand(i)) {
|
||||
int vgprIdx = ii->getRegisterIndex(i, ii);
|
||||
int vgprIdx = ii->getRegisterIndex(i);
|
||||
int nRegs = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
|
||||
|
||||
@@ -927,7 +927,7 @@ Wavefront::exec()
|
||||
// vector RAW dependency tracking
|
||||
for (int i = 0; i < ii->getNumOperands(); i++) {
|
||||
if (ii->isVectorRegister(i)) {
|
||||
int vgpr = ii->getRegisterIndex(i, ii);
|
||||
int vgpr = ii->getRegisterIndex(i);
|
||||
int nReg = ii->getOperandSize(i) <= 4 ? 1 :
|
||||
ii->getOperandSize(i) / 4;
|
||||
for (int n = 0; n < nReg; n++) {
|
||||
|
||||
Reference in New Issue
Block a user