From 0e2564a6292505e4dc65cb180085165fa03c98ce Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Thu, 7 Mar 2019 19:26:10 -0500 Subject: [PATCH] arch-gcn3, gpu-compute: Update getRegisterIndex() API This change removes the GPUDynInstPtr argument from getRegisterIndex(). The dynamic inst was only needed to get access to its parent WF's state so it could determine the number of scalar registers the wave was allocated. However, we can simply pass the number of scalar registers directly. This cuts down on shared pointer usage. Change-Id: I29ab8d9a3de1f8b82b820ef421fc653284567c65 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42210 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh | 2 +- src/arch/amdgpu/gcn3/insts/op_encodings.cc | 128 +++++++----------- src/arch/amdgpu/gcn3/insts/op_encodings.hh | 34 ++--- src/gpu-compute/fetch_unit.cc | 4 +- src/gpu-compute/gpu_dyn_inst.cc | 11 +- src/gpu-compute/gpu_dyn_inst.hh | 4 +- src/gpu-compute/gpu_static_inst.hh | 5 +- src/gpu-compute/scalar_register_file.cc | 8 +- src/gpu-compute/vector_register_file.cc | 8 +- src/gpu-compute/wavefront.cc | 2 +- 10 files changed, 86 insertions(+), 120 deletions(-) diff --git a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh index 03beb20b94..e4983e8341 100644 --- a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh +++ b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh @@ -70,7 +70,7 @@ namespace Gcn3ISA int getOperandSize(int opIdx) override { return 0; } int - getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override + getRegisterIndex(int opIdx, int num_scalar_regs) override { return 0; } diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc index a6a3a26fdf..34bd35f893 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.cc +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.cc @@ -128,21 +128,18 @@ namespace Gcn3ISA } int - Inst_SOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOP2::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SSRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs); case 2: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -244,7 +241,7 @@ namespace Gcn3ISA } int - Inst_SOPK::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPK::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -253,8 +250,7 @@ namespace Gcn3ISA case 0: return -1; case 1: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -349,7 +345,7 @@ namespace Gcn3ISA } int - Inst_SOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOP1::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -359,14 +355,11 @@ namespace Gcn3ISA if (instData.OP == 0x1C) { // Special case for s_getpc, which has no source reg. // Instead, it implicitly reads the PC. - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -467,18 +460,16 @@ namespace Gcn3ISA } int - Inst_SOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SSRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -583,7 +574,7 @@ namespace Gcn3ISA } int - Inst_SOPP::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPP::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -691,7 +682,7 @@ namespace Gcn3ISA } int - Inst_SMEM::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SMEM::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -700,8 +691,7 @@ namespace Gcn3ISA case 0: // SBASE has an implied LSB of 0, so we need // to shift by one to get the actual value - return opSelectorToRegIdx(instData.SBASE << 1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SBASE << 1, num_scalar_regs); case 1: if (instData.IMM) { // operand is an immediate value, not a register @@ -710,8 +700,7 @@ namespace Gcn3ISA return extData.OFFSET; } case 2: - return opSelectorToRegIdx(instData.SDATA, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDATA, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -901,21 +890,19 @@ namespace Gcn3ISA } int - Inst_VOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP2::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VSRC1; case 2: if (readsVCC()) { - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } else { return instData.VDST; } @@ -924,13 +911,11 @@ namespace Gcn3ISA if (readsVCC()) { return instData.VDST; } else { - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 4: assert(writesVCC() && readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1044,15 +1029,14 @@ namespace Gcn3ISA } int - Inst_VOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP1::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VDST; default: @@ -1066,6 +1050,7 @@ namespace Gcn3ISA Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode) : GCN3GPUStaticInst(opcode) { + setFlag(WritesVCC); // copy first instruction DWORD instData = iFmt[0]; if (hasSecondDword(iFmt)) { @@ -1167,21 +1152,19 @@ namespace Gcn3ISA } int - Inst_VOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOPC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VSRC1; case 2: // VCC - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1437,7 +1420,7 @@ namespace Gcn3ISA } int - Inst_VOP3::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP3::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1445,20 +1428,17 @@ namespace Gcn3ISA switch (opIdx) { case 0: // SRC0 - return opSelectorToRegIdx(extData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC0, num_scalar_regs); case 1: if (numSrcRegOperands() > 1) { // if we have more than 1 source operand then // op index 1 corresponds to SRC1 - return opSelectorToRegIdx(extData.SRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC1, num_scalar_regs); } else { // if we only have 1 source operand, opIdx 1 // will be VDST if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } @@ -1468,15 +1448,13 @@ namespace Gcn3ISA // op index 2 corresponds to SRC2. SRC2 may be // a scalar or vector register, an inline // constant, or a special HW register - return opSelectorToRegIdx(extData.SRC2, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC2, num_scalar_regs); } else if (numSrcRegOperands() == 2) { // if we only have 2 source operands, opIdx 2 // will be VDST, and VDST is always a vector // reg if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } else { @@ -1484,31 +1462,27 @@ namespace Gcn3ISA // VDST then it must be a VCC read or write, // and VCC is never stored in a VGPR assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 3: if (numSrcRegOperands() == 3) { // if we have 3 source operands then op // idx 3 will correspond to VDST if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } else { // if this idx doesn't correspond to VDST // then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 4: // if a VOP3 instruction has more than 4 ops // it must read from and write to VCC assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1731,7 +1705,7 @@ namespace Gcn3ISA } int - Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1739,14 +1713,12 @@ namespace Gcn3ISA switch (opIdx) { case 0: // SRC0 - return opSelectorToRegIdx(extData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC0, num_scalar_regs); case 1: if (numSrcRegOperands() > 1) { // if we have more than 1 source operand then // op index 1 corresponds to SRC1 - return opSelectorToRegIdx(extData.SRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC1, num_scalar_regs); } else { // if we only have 1 source operand, opIdx 1 // will be VDST @@ -1756,8 +1728,7 @@ namespace Gcn3ISA if (numSrcRegOperands() > 2) { // if we have more than 2 source operand then // op index 2 corresponds to SRC2 - return opSelectorToRegIdx(extData.SRC2, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC2, num_scalar_regs); } else if (numSrcRegOperands() == 2) { // if we only have 2 source operands, opIdx 2 // will be VDST @@ -1766,8 +1737,7 @@ namespace Gcn3ISA // if this idx doesn't correspond to SRCX or // VDST then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } case 3: if (numSrcRegOperands() == 3) { @@ -1778,15 +1748,13 @@ namespace Gcn3ISA // if this idx doesn't correspond to VDST // then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } case 4: // if a VOP3 instruction has more than 4 ops // it must read from and write to VCC assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1871,7 +1839,7 @@ namespace Gcn3ISA } int - Inst_DS::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_DS::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1993,7 +1961,7 @@ namespace Gcn3ISA } int - Inst_MUBUF::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_MUBUF::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -2170,7 +2138,7 @@ namespace Gcn3ISA } int - Inst_FLAT::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_FLAT::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh index 0957a7dda9..c2a417ac0a 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.hh +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.hh @@ -79,7 +79,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -103,7 +103,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -127,7 +127,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -151,7 +151,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -175,7 +175,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -193,7 +193,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: /** @@ -276,7 +276,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -300,7 +300,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -324,7 +324,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -361,7 +361,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -395,7 +395,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -418,7 +418,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template @@ -518,7 +518,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template @@ -649,7 +649,7 @@ namespace Gcn3ISA * non-formatted accesses, this is done on a per-lane * basis. */ - if (stride == 0 || !rsrc_desc.swizzleEn) { + if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) { if (buf_off + stride * buf_idx >= rsrc_desc.numRecords - s_offset.rawData()) { DPRINTF(GCN3, "mubuf out-of-bounds condition 1: " @@ -657,13 +657,13 @@ namespace Gcn3ISA "const_stride = %llx, " "const_num_records = %llx\n", lane, buf_off + stride * buf_idx, - stride, rsrc_desc.numRecords); + rsrc_desc.stride, rsrc_desc.numRecords); oobMask.set(lane); continue; } } - if (stride != 0 && rsrc_desc.swizzleEn) { + if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) { if (buf_idx >= rsrc_desc.numRecords || buf_off >= stride) { DPRINTF(GCN3, "mubuf out-of-bounds condition 2: " @@ -776,7 +776,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index d2af7b3519..2664c3bebe 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -557,7 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts() wavefront, gpu_static_inst, wavefront->computeUnit-> getAndIncSeqNum()); - gpu_dyn_inst->initOperandInfo(gpu_dyn_inst); + gpu_dyn_inst->initOperandInfo(); wavefront->instructionBuffer.push_back(gpu_dyn_inst); DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). " @@ -598,7 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst() wavefront, gpu_static_inst, wavefront->computeUnit-> getAndIncSeqNum()); - gpu_dyn_inst->initOperandInfo(gpu_dyn_inst); + gpu_dyn_inst->initOperandInfo(); wavefront->instructionBuffer.push_back(gpu_dyn_inst); DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) " diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index c08e4b9c6b..2dc1dcf532 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -85,9 +85,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, } void -GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) +GPUDynInst::initOperandInfo() { - assert(gpu_dyn_inst->wavefront()); /** * Generate and cache the operand to register mapping information. This * prevents this info from being generated multiple times throughout @@ -102,7 +101,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) int op_num_dwords(-1); if (isVectorRegister(op_idx)) { - virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst); + virt_idx = getRegisterIndex(op_idx); op_num_dwords = numOpdDWORDs(op_idx); if (isSrcOperand(op_idx)) { @@ -143,7 +142,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) phys_indices); } } else if (isScalarRegister(op_idx)) { - virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst); + virt_idx = getRegisterIndex(op_idx); op_num_dwords = numOpdDWORDs(op_idx); if (isSrcOperand(op_idx)) { @@ -287,9 +286,9 @@ GPUDynInst::isScalarRegister(int operandIdx) } int -GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst) +GPUDynInst::getRegisterIndex(int operandIdx) { - return _staticInst->getRegisterIndex(operandIdx, gpuDynInst); + return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs); } int diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index e38a87f6e7..b2635bcf0c 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -117,7 +117,7 @@ class GPUDynInst : public GPUExecContext GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum); ~GPUDynInst(); - void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst); + void initOperandInfo(); void execute(GPUDynInstPtr gpuDynInst); const std::vector& @@ -159,7 +159,7 @@ class GPUDynInst : public GPUExecContext int getNumOperands(); bool isVectorRegister(int operandIdx); bool isScalarRegister(int operandIdx); - int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst); + int getRegisterIndex(int operandIdx); int getOperandSize(int operandIdx); bool isDstOperand(int operandIdx); bool isSrcOperand(int operandIdx); diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index f973f2f430..cdd342e790 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -86,8 +86,7 @@ class GPUStaticInst : public GPUStaticInstFlags virtual bool isExecMaskRegister(int opIdx) = 0; virtual int getOperandSize(int operandIndex) = 0; - virtual int getRegisterIndex(int operandIndex, - GPUDynInstPtr gpuDynInst) = 0; + virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0; virtual int numDstRegOperands() = 0; virtual int numSrcRegOperands() = 0; @@ -310,7 +309,7 @@ class KernelLaunchStaticInst : public GPUStaticInst int getOperandSize(int operandIndex) override { return 0; } int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + getRegisterIndex(int operandIndex, int num_scalar_regs) override { return 0; } diff --git a/src/gpu-compute/scalar_register_file.cc b/src/gpu-compute/scalar_register_file.cc index 8068ff8239..ba6ac57477 100644 --- a/src/gpu-compute/scalar_register_file.cc +++ b/src/gpu-compute/scalar_register_file.cc @@ -53,7 +53,7 @@ ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -84,7 +84,7 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -116,7 +116,7 @@ ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int j = 0; j < nRegs; j++) { @@ -139,7 +139,7 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w, for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc index 40ce281f69..f162279a25 100644 --- a/src/gpu-compute/vector_register_file.cc +++ b/src/gpu-compute/vector_register_file.cc @@ -59,7 +59,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const { for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); // determine number of registers int nRegs = @@ -89,7 +89,7 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) // iterate over all register destination operands for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -150,7 +150,7 @@ VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int j = 0; j < nRegs; j++) { @@ -183,7 +183,7 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad( assert(ii->isLoad() || ii->isAtomicRet()); for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 279aac7c18..0cb667598e 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -927,7 +927,7 @@ Wavefront::exec() // vector RAW dependency tracking for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isVectorRegister(i)) { - int vgpr = ii->getRegisterIndex(i, ii); + int vgpr = ii->getRegisterIndex(i); int nReg = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int n = 0; n < nReg; n++) {