From a062229ac3497e0068a62533cf55eb1079313d63 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 14 May 2024 08:12:41 -0700 Subject: [PATCH] arch-vega: Implement v_mov_b64 This instruction is new in MI300 and is used in some of the example applications used to test MFMAs. Change-Id: I739f8ab2be6a93ee3b6bdc4120d0117724edb0d4 --- src/arch/amdgpu/vega/gpu_decoder.cc | 8 ++++- src/arch/amdgpu/vega/gpu_decoder.hh | 1 + src/arch/amdgpu/vega/insts/instructions.hh | 32 ++++++++++++++++++++ src/arch/amdgpu/vega/insts/vop1.cc | 35 ++++++++++++++++++++++ 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/gpu_decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc index 02d45d1c96..0938306c1a 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -3118,7 +3118,7 @@ namespace VegaISA &Decoder::decode_OP_VOP1__V_CLREXCP, &Decoder::decode_invalid, &Decoder::decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP1__V_MOV_B64, &Decoder::decode_OP_VOP1__V_CVT_F16_U16, &Decoder::decode_OP_VOP1__V_CVT_F16_I16, &Decoder::decode_OP_VOP1__V_CVT_U16_F16, @@ -11634,6 +11634,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OP_VOP1__V_MOV_B64(MachInst iFmt) + { + return new Inst_VOP1__V_MOV_B64(&iFmt->iFmt_VOP1); + } // decode_OP_VOP1__V_MOV_B64 + GPUStaticInst* Decoder::decode_OP_VOP1__V_CVT_F16_U16(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index b3c690806d..8094233bd8 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -1289,6 +1289,7 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP1__V_FREXP_MANT_F32(MachInst); GPUStaticInst* decode_OP_VOP1__V_CLREXCP(MachInst); GPUStaticInst* decode_OP_VOP1__V_SCREEN_PARTITION_4SE_B32(MachInst); + GPUStaticInst* decode_OP_VOP1__V_MOV_B64(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_F16_U16(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_F16_I16(MachInst); GPUStaticInst* decode_OP_VOP1__V_CVT_U16_F16(MachInst); diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index b48d31213b..c18f9b7795 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -9890,6 +9890,38 @@ namespace VegaISA void execute(GPUDynInstPtr) override; }; // Inst_VOP1__V_CLREXCP + class Inst_VOP1__V_MOV_B64 : public Inst_VOP1 + { + public: + Inst_VOP1__V_MOV_B64(InFmt_VOP1*); + ~Inst_VOP1__V_MOV_B64(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 1; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src + return 8; + case 1: //vdst + return 8; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP1__V_MOV_B64 + class Inst_VOP1__V_CVT_F16_U16 : public Inst_VOP1 { public: diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc index fc41c0ae78..1bd49653ab 100644 --- a/src/arch/amdgpu/vega/insts/vop1.cc +++ b/src/arch/amdgpu/vega/insts/vop1.cc @@ -1874,6 +1874,41 @@ namespace VegaISA { panicUnimplemented(); } // execute + // --- Inst_VOP1__V_MOV_B64 class methods --- + + Inst_VOP1__V_MOV_B64::Inst_VOP1__V_MOV_B64(InFmt_VOP1 *iFmt) + : Inst_VOP1(iFmt, "v_mov_b64") + { + setFlag(ALU); + } // Inst_VOP1__V_MOV_B64 + + Inst_VOP1__V_MOV_B64::~Inst_VOP1__V_MOV_B64() + { + } // ~Inst_VOP1__V_MOV_B64 + + // --- description from .arch file --- + // D.u = S0.u. + // Input and output modifiers not supported; this is an untyped operation. + void + Inst_VOP1__V_MOV_B64::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandU64 src(gpuDynInst, instData.SRC0); + VecOperandU64 vdst(gpuDynInst, instData.VDST); + + src.readSrc(); + + panic_if(isDPPInst(), "DPP unimplemented for v_mov_b64"); + panic_if(isSDWAInst(), "SDWA unimplemented for v_mov_b64"); + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + vdst[lane] = src[lane]; + } + } + + vdst.write(); + } // execute // --- Inst_VOP1__V_CVT_F16_U16 class methods --- Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)