From 91955ae879eae3bf585a333bb8b1e4a71a0630ff Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Thu, 9 May 2024 11:58:15 -0700 Subject: [PATCH] arch-vega: Decodings for all MFMA/SMFMACs up to MI300 This adds the decodings for all of the matrix fused multiply add (MFMA) and sparse matrix fused multiply accumulate (SMFMAC) instructions up to and including MI300. This does not yet provide the implementation for these instructions, however it is easier and less tedious to add them in bulk rather that one at a time. Change-Id: I5acd23ca8a26bdec843bead545d1f8820ad95b41 --- src/arch/amdgpu/vega/gpu_decoder.cc | 405 ++++++++++++++++++++++++---- src/arch/amdgpu/vega/gpu_decoder.hh | 49 +++- 2 files changed, 391 insertions(+), 63 deletions(-) diff --git a/src/arch/amdgpu/vega/gpu_decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc index 45ad5c5af8..02d45d1c96 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.cc +++ b/src/arch/amdgpu/vega/gpu_decoder.cc @@ -3644,70 +3644,70 @@ namespace VegaISA &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32, &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X1_16B_F32, &Decoder::decode_invalid, - &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2F32, - &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16, + &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16I8, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8, + &Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8, &Decoder::decode_OP_VOP3P__V_ACCVGPR_READ, &Decoder::decode_OP_VOP3P__V_ACCVGPR_WRITE, &Decoder::decode_invalid, &Decoder::decode_invalid, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8, &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8, &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4F64, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, - &Decoder::decode_invalid, + &Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64, + &Decoder::decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8, + &Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8, }; GPUStaticInst* @@ -13172,9 +13172,16 @@ namespace VegaISA } GPUStaticInst* - Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2F32(MachInst iFmt) + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32(MachInst iFmt) { - return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X2F32( + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X1_2B_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X1_4B_F32( &iFmt->iFmt_VOP3P_MAI); } @@ -13186,16 +13193,303 @@ namespace VegaISA } GPUStaticInst* - Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4F32(MachInst iFmt) + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32(MachInst iFmt) { - return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X4F32( + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X2_F32( &iFmt->iFmt_VOP3P_MAI); } GPUStaticInst* - Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16I8(MachInst iFmt) + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32(MachInst iFmt) { - return new Inst_VOP3P_MAI__V_MFMA_I32_16X16X16I8( + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_F32( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X4_2B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X4_4B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_4X4X4_16B_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F32_16X16X16_F16( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_I32_16X16X16_I8( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F64_4X4X4_4B_F64( + &iFmt->iFmt_VOP3P_MAI); + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8(MachInst iFmt) + { + fatal("Trying to decode instruction without a class\n"); + return nullptr; + } + + GPUStaticInst* + Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64(MachInst iFmt) + { + return new Inst_VOP3P_MAI__V_MFMA_F64_16X16X4_F64( &iFmt->iFmt_VOP3P_MAI); } @@ -13211,13 +13505,6 @@ namespace VegaISA return new Inst_VOP3P__V_ACCVGPR_WRITE(&iFmt->iFmt_VOP3P); } - GPUStaticInst* - Decoder::decode_OP_VOP3P__V_MFMA_F64_16X16X4F64(MachInst iFmt) - { - return new Inst_VOP3P_MAI__V_MFMA_F64_16X16X4F64( - &iFmt->iFmt_VOP3P_MAI); - } - GPUStaticInst* Decoder::decode_invalid(MachInst iFmt) { diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh index 09163d1007..b3c690806d 100644 --- a/src/arch/amdgpu/vega/gpu_decoder.hh +++ b/src/arch/amdgpu/vega/gpu_decoder.hh @@ -1606,13 +1606,54 @@ namespace VegaISA GPUStaticInst* decode_OP_VOP3P__V_DOT4_U32_U8(MachInst); GPUStaticInst* decode_OP_VOP3P__V_DOT8_I32_I4(MachInst); GPUStaticInst* decode_OP_VOP3P__V_DOT8_U32_U4(MachInst); - GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X2F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X1_2B_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X1_4B_F32(MachInst); GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X1_16B_F32(MachInst); - GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4F32(MachInst); - GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X16I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X2_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_F32(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X8_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X16_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X4_2B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X4_4B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_4X4X4_16B_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X8_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X16_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_32X32X16_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_I32_16X16X32_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X4_2B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X4_4B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_4X4X4_16B_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X16_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_F16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X32_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X16_BF16(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_I32_16X16X64_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_I32_32X32X32_I8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F64_16X16X4_F64(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F64_4X4X4_4B_F64(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_16X16X32_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_MFMA_F32_32X32X16_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_16X16X64_FP8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_BF8_FP8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_BF8(MachInst); + GPUStaticInst* decode_OP_VOP3P__V_SMFMAC_F32_32X32X32_FP8_FP8(MachInst); GPUStaticInst* decode_OP_VOP3P__V_ACCVGPR_READ(MachInst); GPUStaticInst* decode_OP_VOP3P__V_ACCVGPR_WRITE(MachInst); - GPUStaticInst* decode_OP_VOP3P__V_MFMA_F64_16X16X4F64(MachInst); GPUStaticInst* subDecode_OPU_VOP3(MachInst); GPUStaticInst* subDecode_OP_DS(MachInst); GPUStaticInst* subDecode_OP_FLAT(MachInst);