diff --git a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh index 03beb20b94..e4983e8341 100644 --- a/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh +++ b/src/arch/amdgpu/gcn3/insts/gpu_static_inst.hh @@ -70,7 +70,7 @@ namespace Gcn3ISA int getOperandSize(int opIdx) override { return 0; } int - getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override + getRegisterIndex(int opIdx, int num_scalar_regs) override { return 0; } diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.cc b/src/arch/amdgpu/gcn3/insts/op_encodings.cc index a6a3a26fdf..34bd35f893 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.cc +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.cc @@ -128,21 +128,18 @@ namespace Gcn3ISA } int - Inst_SOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOP2::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SSRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs); case 2: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -244,7 +241,7 @@ namespace Gcn3ISA } int - Inst_SOPK::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPK::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -253,8 +250,7 @@ namespace Gcn3ISA case 0: return -1; case 1: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -349,7 +345,7 @@ namespace Gcn3ISA } int - Inst_SOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOP1::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -359,14 +355,11 @@ namespace Gcn3ISA if (instData.OP == 0x1C) { // Special case for s_getpc, which has no source reg. // Instead, it implicitly reads the PC. - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -467,18 +460,16 @@ namespace Gcn3ISA } int - Inst_SOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SSRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC0, num_scalar_regs); case 1: - return opSelectorToRegIdx(instData.SSRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SSRC1, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -583,7 +574,7 @@ namespace Gcn3ISA } int - Inst_SOPP::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SOPP::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -691,7 +682,7 @@ namespace Gcn3ISA } int - Inst_SMEM::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_SMEM::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -700,8 +691,7 @@ namespace Gcn3ISA case 0: // SBASE has an implied LSB of 0, so we need // to shift by one to get the actual value - return opSelectorToRegIdx(instData.SBASE << 1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SBASE << 1, num_scalar_regs); case 1: if (instData.IMM) { // operand is an immediate value, not a register @@ -710,8 +700,7 @@ namespace Gcn3ISA return extData.OFFSET; } case 2: - return opSelectorToRegIdx(instData.SDATA, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDATA, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -901,21 +890,19 @@ namespace Gcn3ISA } int - Inst_VOP2::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP2::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VSRC1; case 2: if (readsVCC()) { - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } else { return instData.VDST; } @@ -924,13 +911,11 @@ namespace Gcn3ISA if (readsVCC()) { return instData.VDST; } else { - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 4: assert(writesVCC() && readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1044,15 +1029,14 @@ namespace Gcn3ISA } int - Inst_VOP1::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP1::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VDST; default: @@ -1066,6 +1050,7 @@ namespace Gcn3ISA Inst_VOPC::Inst_VOPC(InFmt_VOPC *iFmt, const std::string &opcode) : GCN3GPUStaticInst(opcode) { + setFlag(WritesVCC); // copy first instruction DWORD instData = iFmt[0]; if (hasSecondDword(iFmt)) { @@ -1167,21 +1152,19 @@ namespace Gcn3ISA } int - Inst_VOPC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOPC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); switch (opIdx) { case 0: - return opSelectorToRegIdx(instData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SRC0, num_scalar_regs); case 1: return instData.VSRC1; case 2: // VCC - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1437,7 +1420,7 @@ namespace Gcn3ISA } int - Inst_VOP3::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP3::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1445,20 +1428,17 @@ namespace Gcn3ISA switch (opIdx) { case 0: // SRC0 - return opSelectorToRegIdx(extData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC0, num_scalar_regs); case 1: if (numSrcRegOperands() > 1) { // if we have more than 1 source operand then // op index 1 corresponds to SRC1 - return opSelectorToRegIdx(extData.SRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC1, num_scalar_regs); } else { // if we only have 1 source operand, opIdx 1 // will be VDST if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } @@ -1468,15 +1448,13 @@ namespace Gcn3ISA // op index 2 corresponds to SRC2. SRC2 may be // a scalar or vector register, an inline // constant, or a special HW register - return opSelectorToRegIdx(extData.SRC2, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC2, num_scalar_regs); } else if (numSrcRegOperands() == 2) { // if we only have 2 source operands, opIdx 2 // will be VDST, and VDST is always a vector // reg if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } else { @@ -1484,31 +1462,27 @@ namespace Gcn3ISA // VDST then it must be a VCC read or write, // and VCC is never stored in a VGPR assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 3: if (numSrcRegOperands() == 3) { // if we have 3 source operands then op // idx 3 will correspond to VDST if (sgprDst) { - return opSelectorToRegIdx(instData.VDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.VDST, num_scalar_regs); } return instData.VDST; } else { // if this idx doesn't correspond to VDST // then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); } case 4: // if a VOP3 instruction has more than 4 ops // it must read from and write to VCC assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(REG_VCC_LO, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(REG_VCC_LO, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1731,7 +1705,7 @@ namespace Gcn3ISA } int - Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_VOP3_SDST_ENC::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1739,14 +1713,12 @@ namespace Gcn3ISA switch (opIdx) { case 0: // SRC0 - return opSelectorToRegIdx(extData.SRC0, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC0, num_scalar_regs); case 1: if (numSrcRegOperands() > 1) { // if we have more than 1 source operand then // op index 1 corresponds to SRC1 - return opSelectorToRegIdx(extData.SRC1, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC1, num_scalar_regs); } else { // if we only have 1 source operand, opIdx 1 // will be VDST @@ -1756,8 +1728,7 @@ namespace Gcn3ISA if (numSrcRegOperands() > 2) { // if we have more than 2 source operand then // op index 2 corresponds to SRC2 - return opSelectorToRegIdx(extData.SRC2, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(extData.SRC2, num_scalar_regs); } else if (numSrcRegOperands() == 2) { // if we only have 2 source operands, opIdx 2 // will be VDST @@ -1766,8 +1737,7 @@ namespace Gcn3ISA // if this idx doesn't correspond to SRCX or // VDST then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } case 3: if (numSrcRegOperands() == 3) { @@ -1778,15 +1748,13 @@ namespace Gcn3ISA // if this idx doesn't correspond to VDST // then it must be a VCC read or write assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); } case 4: // if a VOP3 instruction has more than 4 ops // it must read from and write to VCC assert(writesVCC() || readsVCC()); - return opSelectorToRegIdx(instData.SDST, - gpuDynInst->wavefront()->reservedScalarRegs); + return opSelectorToRegIdx(instData.SDST, num_scalar_regs); default: fatal("Operand at idx %i does not exist\n", opIdx); return -1; @@ -1871,7 +1839,7 @@ namespace Gcn3ISA } int - Inst_DS::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_DS::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -1993,7 +1961,7 @@ namespace Gcn3ISA } int - Inst_MUBUF::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_MUBUF::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); @@ -2170,7 +2138,7 @@ namespace Gcn3ISA } int - Inst_FLAT::getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) + Inst_FLAT::getRegisterIndex(int opIdx, int num_scalar_regs) { assert(opIdx >= 0); assert(opIdx < getNumOperands()); diff --git a/src/arch/amdgpu/gcn3/insts/op_encodings.hh b/src/arch/amdgpu/gcn3/insts/op_encodings.hh index 0957a7dda9..c2a417ac0a 100644 --- a/src/arch/amdgpu/gcn3/insts/op_encodings.hh +++ b/src/arch/amdgpu/gcn3/insts/op_encodings.hh @@ -79,7 +79,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -103,7 +103,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -127,7 +127,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -151,7 +151,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -175,7 +175,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -193,7 +193,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: /** @@ -276,7 +276,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -300,7 +300,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -324,7 +324,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -361,7 +361,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -395,7 +395,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: // first instruction DWORD @@ -418,7 +418,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template @@ -518,7 +518,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template @@ -649,7 +649,7 @@ namespace Gcn3ISA * non-formatted accesses, this is done on a per-lane * basis. */ - if (stride == 0 || !rsrc_desc.swizzleEn) { + if (rsrc_desc.stride == 0 || !rsrc_desc.swizzleEn) { if (buf_off + stride * buf_idx >= rsrc_desc.numRecords - s_offset.rawData()) { DPRINTF(GCN3, "mubuf out-of-bounds condition 1: " @@ -657,13 +657,13 @@ namespace Gcn3ISA "const_stride = %llx, " "const_num_records = %llx\n", lane, buf_off + stride * buf_idx, - stride, rsrc_desc.numRecords); + rsrc_desc.stride, rsrc_desc.numRecords); oobMask.set(lane); continue; } } - if (stride != 0 && rsrc_desc.swizzleEn) { + if (rsrc_desc.stride != 0 && rsrc_desc.swizzleEn) { if (buf_idx >= rsrc_desc.numRecords || buf_off >= stride) { DPRINTF(GCN3, "mubuf out-of-bounds condition 2: " @@ -776,7 +776,7 @@ namespace Gcn3ISA bool isScalarRegister(int opIdx) override; bool isVectorRegister(int opIdx) override; - int getRegisterIndex(int opIdx, GPUDynInstPtr gpuDynInst) override; + int getRegisterIndex(int opIdx, int num_scalar_regs) override; protected: template diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index d2af7b3519..2664c3bebe 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -557,7 +557,7 @@ FetchUnit::FetchBufDesc::decodeInsts() wavefront, gpu_static_inst, wavefront->computeUnit-> getAndIncSeqNum()); - gpu_dyn_inst->initOperandInfo(gpu_dyn_inst); + gpu_dyn_inst->initOperandInfo(); wavefront->instructionBuffer.push_back(gpu_dyn_inst); DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). " @@ -598,7 +598,7 @@ FetchUnit::FetchBufDesc::decodeSplitInst() wavefront, gpu_static_inst, wavefront->computeUnit-> getAndIncSeqNum()); - gpu_dyn_inst->initOperandInfo(gpu_dyn_inst); + gpu_dyn_inst->initOperandInfo(); wavefront->instructionBuffer.push_back(gpu_dyn_inst); DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) " diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index c08e4b9c6b..2dc1dcf532 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -85,9 +85,8 @@ GPUDynInst::GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, } void -GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) +GPUDynInst::initOperandInfo() { - assert(gpu_dyn_inst->wavefront()); /** * Generate and cache the operand to register mapping information. This * prevents this info from being generated multiple times throughout @@ -102,7 +101,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) int op_num_dwords(-1); if (isVectorRegister(op_idx)) { - virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst); + virt_idx = getRegisterIndex(op_idx); op_num_dwords = numOpdDWORDs(op_idx); if (isSrcOperand(op_idx)) { @@ -143,7 +142,7 @@ GPUDynInst::initOperandInfo(GPUDynInstPtr &gpu_dyn_inst) phys_indices); } } else if (isScalarRegister(op_idx)) { - virt_idx = getRegisterIndex(op_idx, gpu_dyn_inst); + virt_idx = getRegisterIndex(op_idx); op_num_dwords = numOpdDWORDs(op_idx); if (isSrcOperand(op_idx)) { @@ -287,9 +286,9 @@ GPUDynInst::isScalarRegister(int operandIdx) } int -GPUDynInst::getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst) +GPUDynInst::getRegisterIndex(int operandIdx) { - return _staticInst->getRegisterIndex(operandIdx, gpuDynInst); + return _staticInst->getRegisterIndex(operandIdx, wf->reservedScalarRegs); } int diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index e38a87f6e7..b2635bcf0c 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -117,7 +117,7 @@ class GPUDynInst : public GPUExecContext GPUDynInst(ComputeUnit *_cu, Wavefront *_wf, GPUStaticInst *static_inst, uint64_t instSeqNum); ~GPUDynInst(); - void initOperandInfo(GPUDynInstPtr &gpu_dyn_inst); + void initOperandInfo(); void execute(GPUDynInstPtr gpuDynInst); const std::vector& @@ -159,7 +159,7 @@ class GPUDynInst : public GPUExecContext int getNumOperands(); bool isVectorRegister(int operandIdx); bool isScalarRegister(int operandIdx); - int getRegisterIndex(int operandIdx, GPUDynInstPtr gpuDynInst); + int getRegisterIndex(int operandIdx); int getOperandSize(int operandIdx); bool isDstOperand(int operandIdx); bool isSrcOperand(int operandIdx); diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index f973f2f430..cdd342e790 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -86,8 +86,7 @@ class GPUStaticInst : public GPUStaticInstFlags virtual bool isExecMaskRegister(int opIdx) = 0; virtual int getOperandSize(int operandIndex) = 0; - virtual int getRegisterIndex(int operandIndex, - GPUDynInstPtr gpuDynInst) = 0; + virtual int getRegisterIndex(int operandIndex, int num_scalar_regs) = 0; virtual int numDstRegOperands() = 0; virtual int numSrcRegOperands() = 0; @@ -310,7 +309,7 @@ class KernelLaunchStaticInst : public GPUStaticInst int getOperandSize(int operandIndex) override { return 0; } int - getRegisterIndex(int operandIndex, GPUDynInstPtr gpuDynInst) override + getRegisterIndex(int operandIndex, int num_scalar_regs) override { return 0; } diff --git a/src/gpu-compute/scalar_register_file.cc b/src/gpu-compute/scalar_register_file.cc index 8068ff8239..ba6ac57477 100644 --- a/src/gpu-compute/scalar_register_file.cc +++ b/src/gpu-compute/scalar_register_file.cc @@ -53,7 +53,7 @@ ScalarRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isSrcOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -84,7 +84,7 @@ ScalarRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -116,7 +116,7 @@ ScalarRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int j = 0; j < nRegs; j++) { @@ -139,7 +139,7 @@ ScalarRegisterFile::scheduleWriteOperandsFromLoad(Wavefront *w, for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isScalarRegister(i) && ii->isDstOperand(i)) { - int sgprIdx = ii->getRegisterIndex(i, ii); + int sgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; diff --git a/src/gpu-compute/vector_register_file.cc b/src/gpu-compute/vector_register_file.cc index 40ce281f69..f162279a25 100644 --- a/src/gpu-compute/vector_register_file.cc +++ b/src/gpu-compute/vector_register_file.cc @@ -59,7 +59,7 @@ VectorRegisterFile::operandsReady(Wavefront *w, GPUDynInstPtr ii) const { for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isSrcOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); // determine number of registers int nRegs = @@ -89,7 +89,7 @@ VectorRegisterFile::scheduleWriteOperands(Wavefront *w, GPUDynInstPtr ii) // iterate over all register destination operands for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; @@ -150,7 +150,7 @@ VectorRegisterFile::waveExecuteInst(Wavefront *w, GPUDynInstPtr ii) for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int j = 0; j < nRegs; j++) { @@ -183,7 +183,7 @@ VectorRegisterFile::scheduleWriteOperandsFromLoad( assert(ii->isLoad() || ii->isAtomicRet()); for (int i = 0; i < ii->getNumOperands(); ++i) { if (ii->isVectorRegister(i) && ii->isDstOperand(i)) { - int vgprIdx = ii->getRegisterIndex(i, ii); + int vgprIdx = ii->getRegisterIndex(i); int nRegs = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 279aac7c18..0cb667598e 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -927,7 +927,7 @@ Wavefront::exec() // vector RAW dependency tracking for (int i = 0; i < ii->getNumOperands(); i++) { if (ii->isVectorRegister(i)) { - int vgpr = ii->getRegisterIndex(i, ii); + int vgpr = ii->getRegisterIndex(i); int nReg = ii->getOperandSize(i) <= 4 ? 1 : ii->getOperandSize(i) / 4; for (int n = 0; n < nReg; n++) {