diff --git a/src/arch/amdgpu/vega/gpu_decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc index 0f4b1e9872..e07a392ced 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -3144,7 +3144,7 @@ namespace VegaISA &Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16, &Decoder::decode_invalid, &Decoder::decode_OP_VOP1__V_SWAP_B32, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -8622,7 +8622,6 @@ namespace VegaISA Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORD(MachInst iFmt) { return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT); - return nullptr; } GPUStaticInst* @@ -9898,29 +9897,25 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_UBYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SBYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_USHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SSHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -9950,8 +9945,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_BYTE(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -9964,8 +9958,7 @@ namespace VegaISA GPUStaticInst* Decoder::decode_OP_SCRATCH__SCRATCH_STORE_SHORT(MachInst iFmt) { - fatal("Trying to decode instruction without a class\n"); - return nullptr; + return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT); } GPUStaticInst* @@ -11784,6 +11777,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst iFmt) + { + return new Inst_VOP1__V_ACCVGPR_MOV_B32(&iFmt->iFmt_VOP1); + } + GPUStaticInst* Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index 8094233bd8..2523734ce5 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -1314,6 +1314,7 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP1__V_CVT_NORM_U16_F16(MachInst); GPUStaticInst* decode_OP_VOP1__V_SAT_PK_U8_I16(MachInst); GPUStaticInst* decode_OP_VOP1__V_SWAP_B32(MachInst); + GPUStaticInst* decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst); GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst); GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst); GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst); diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index 9d91526f3f..4e71f13ad4 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -10562,6 +10562,38 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP1__V_LOG_LEGACY_F32 + class Inst_VOP1__V_ACCVGPR_MOV_B32 : public Inst_VOP1 + { + public: + Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1*); + ~Inst_VOP1__V_ACCVGPR_MOV_B32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src + return 4; + case 1: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP1__V_ACCVGPR_MOV_B32 + class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC { public: diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index 3bbf1e0085..f970923951 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -2397,6 +2397,38 @@ namespace VegaISA } } + vdst.write(); + } // execute + // --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods --- + + Inst_VOP1__V_ACCVGPR_MOV_B32:: + Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_accvgpr_mov_b32") + { + setFlag(ALU); + } // Inst_VOP1__V_ACCVGPR_MOV_B32 + + Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32() + { + } // ~Inst_VOP1__V_ACCVGPR_MOV_B32 + + void + Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + unsigned accum_offset = wf->accumOffset; + + ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset); + VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset); + + src.readSrc(); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + vdst.write(); } // execute } // namespace VegaISA diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh index 593f0e34fd..1bb9b43d1f 100644 --- a/src/arch/amdgpu/vega/operand.hh +++ b/src/arch/amdgpu/vega/operand.hh @@ -579,8 +579,30 @@ namespace VegaISA case REG_SRC_SWDA: case REG_SRC_DPP: case REG_SRC_LITERAL: - assert(NumDwords == 1); + /** + * From the Vega specification: + * When a literal constant is used with a 64 bit instruction, + * the literal is expanded to 64 bits by: padding the LSBs + * with zeros for floats, padding the MSBs with zeros for + * unsigned ints, and by sign-extending signed ints. + */ srfData[0] = _gpuDynInst->srcLiteral(); + if constexpr (NumDwords == 2) { + if constexpr (std::is_integral_v) { + if constexpr (std::is_signed_v) { + if (bits(srfData[0], 31, 31) == 1) { + srfData[1] = 0xffffffff; + } else { + srfData[1] = 0; + } + } else { + srfData[1] = 0; + } + } else { + srfData[1] = _gpuDynInst->srcLiteral(); + srfData[0] = 0; + } + } break; case REG_SHARED_BASE: {