From 2bb62a05e13d5e5ad3381ccee84323792ef74bb6 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Tue, 7 May 2024 17:21:13 -0700 Subject: [PATCH] arch-vega: Implement v_cvt_pk_fp8_f32 This instruction serves as a test for the MXFP8 type. Change-Id: I2ce30bf7f3a3ecc850a445aebdf971c37c39a79e --- src/arch/amdgpu/vega/gpu_decoder.cc | 8 +++- src/arch/amdgpu/vega/gpu_decoder.hh | 1 + src/arch/amdgpu/vega/insts/instructions.hh | 33 +++++++++++++ src/arch/amdgpu/vega/insts/vop3.cc | 56 ++++++++++++++++++++++ 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/src/arch/amdgpu/vega/gpu_decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc index 97a22b6e37..45ad5c5af8 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -1245,7 +1245,7 @@ namespace VegaISA &Decoder::decode_OPU_VOP3__V_SUB_I16, &Decoder::decode_OPU_VOP3__V_PACK_B32_F16, &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, @@ -7295,6 +7295,12 @@ namespace VegaISA return nullptr; } + GPUStaticInst* + Decoder::decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst iFmt) + { + return new Inst_VOP3__V_CVT_PK_FP8_F32(&iFmt->iFmt_VOP3A); + } + GPUStaticInst* Decoder::decode_OP_DS__DS_ADD_U32(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index 9dfd0e7c81..09163d1007 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -509,6 +509,7 @@ namespace VegaISA GPUStaticInst* decode_OPU_VOP3__V_ADD_I16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_SUB_I16(MachInst); GPUStaticInst* decode_OPU_VOP3__V_PACK_B32_F16(MachInst); + GPUStaticInst* decode_OPU_VOP3__V_CVT_PK_FP8_F32(MachInst); GPUStaticInst* decode_OP_DS__DS_ADD_U32(MachInst); GPUStaticInst* decode_OP_DS__DS_SUB_U32(MachInst); GPUStaticInst* decode_OP_DS__DS_RSUB_U32(MachInst); diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh index f4e93303cd..5f5a2a404e 100644 --- a/src/arch/amdgpu/vega/insts/instructions.hh +++ b/src/arch/amdgpu/vega/insts/instructions.hh @@ -44145,6 +44145,39 @@ namespace VegaISA Inst_VOP3P_MAI__V_MFMA<2, 16, 16, 4, 1, ConstVecOperandF64, VecOperandF64>; + class Inst_VOP3__V_CVT_PK_FP8_F32 : public Inst_VOP3A + { + public: + Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A*); + ~Inst_VOP3__V_CVT_PK_FP8_F32(); + + int + getNumOperands() override + { + return numDstRegOperands() + numSrcRegOperands(); + } // getNumOperands + + int numDstRegOperands() override { return 1; } + int numSrcRegOperands() override { return 2; } + + int + getOperandSize(int opIdx) override + { + switch (opIdx) { + case 0: //src_0 + return 4; + case 1: //src_1 + return 4; + case 2: //vdst + return 4; + default: + fatal("op idx %i out of bounds\n", opIdx); + return -1; + } + } // getOperandSize + + void execute(GPUDynInstPtr) override; + }; // Inst_VOP3__V_CVT_PK_FP8_F32 } // namespace VegaISA } // namespace gem5 diff --git a/src/arch/amdgpu/vega/insts/vop3.cc b/src/arch/amdgpu/vega/insts/vop3.cc index 18446d2e2b..921cd18c26 100644 --- a/src/arch/amdgpu/vega/insts/vop3.cc +++ b/src/arch/amdgpu/vega/insts/vop3.cc @@ -29,6 +29,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "arch/amdgpu/common/dtype/mxfp_types.hh" #include "arch/amdgpu/vega/insts/inst_util.hh" #include "arch/amdgpu/vega/insts/instructions.hh" @@ -8920,5 +8921,60 @@ namespace VegaISA { panicUnimplemented(); } // execute + // --- Inst_VOP3__V_CVT_PK_FP8_F32 class methods --- + + Inst_VOP3__V_CVT_PK_FP8_F32::Inst_VOP3__V_CVT_PK_FP8_F32(InFmt_VOP3A *iFmt) + : Inst_VOP3A(iFmt, "v_cvt_pk_fp8_f32", false) + { + setFlag(ALU); + } // Inst_VOP3__V_CVT_PK_FP8_F32 + + Inst_VOP3__V_CVT_PK_FP8_F32::~Inst_VOP3__V_CVT_PK_FP8_F32() + { + } // ~Inst_VOP3__V_CVT_PK_FP8_F32 + + void + Inst_VOP3__V_CVT_PK_FP8_F32::execute(GPUDynInstPtr gpuDynInst) + { + Wavefront *wf = gpuDynInst->wavefront(); + ConstVecOperandF32 src0(gpuDynInst, extData.SRC0); + ConstVecOperandF32 src1(gpuDynInst, extData.SRC1); + VecOperandU32 vdst(gpuDynInst, instData.VDST); + + src0.readSrc(); + src1.readSrc(); + vdst.read(); // Preserve bits + + panic_if(isSDWAInst(), "SDWA not supported for %s", _opcode); + panic_if(isDPPInst(), "DPP not supported for %s", _opcode); + panic_if(instData.CLAMP, "CLAMP not supported for %s", _opcode); + panic_if(extData.OMOD, "OMOD not supported for %s", _opcode); + + unsigned opsel = instData.OPSEL; + unsigned abs = instData.ABS; + unsigned neg = extData.NEG; + + for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) { + if (wf->execMask(lane)) { + AMDGPU::mxfloat8 tmp0(src0[lane]), tmp1(src1[lane]); + + if ((abs & 1) && (tmp0 < 0)) tmp0 = -tmp0; + if ((abs & 2) && (tmp1 < 0)) tmp1 = -tmp1; + if (neg & 1) tmp0 = -tmp0; + if (neg & 2) tmp1 = -tmp1; + + uint16_t packed_data = (bits(tmp0.data, 31, 24) << 8) + | bits(tmp1.data, 31, 24); + + if (opsel & 8) { + replaceBits(vdst[lane], 31, 16, packed_data); + } else { + replaceBits(vdst[lane], 15, 0, packed_data); + } + } + } + + vdst.write(); + } // execute } // namespace VegaISA } // namespace gem5