arch-vega: Implement v_mfma_f32_32x32x8_bf16
Implement a bfloat16 MFMA. This was tested with PyTorch using dtype=torch.bfloat16. Change-Id: I35b4e60e71477553a93020ef0ee31d1bcae9ca5d
This commit is contained in:
@@ -13320,8 +13320,8 @@ namespace VegaISA
|
||||
GPUStaticInst*
|
||||
Decoder::decode_OP_VOP3P__V_MFMA_F32_32X32X8_BF16(MachInst iFmt)
|
||||
{
|
||||
fatal("Trying to decode instruction without a class\n");
|
||||
return nullptr;
|
||||
return new Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_BF16(
|
||||
&iFmt->iFmt_VOP3P_MAI);
|
||||
}
|
||||
|
||||
GPUStaticInst*
|
||||
|
||||
@@ -44394,6 +44394,12 @@ namespace VegaISA
|
||||
Inst_VOP3P_MAI__V_MFMA_MXFP<4, 4, 4, 16, AMDGPU::mxfloat16,
|
||||
&MNEM__V_MFMA_F32_4X4X4_16B_F16>;
|
||||
|
||||
static const char *MNEM__V_MFMA_F32_32X32X8_BF16 =
|
||||
"v_mfma_f32_32x32x8_bf16";
|
||||
using Inst_VOP3P_MAI__V_MFMA_F32_32X32X8_BF16 =
|
||||
Inst_VOP3P_MAI__V_MFMA_MXFP<32, 32, 8, 1, AMDGPU::mxbfloat16,
|
||||
&MNEM__V_MFMA_F32_32X32X8_BF16>;
|
||||
|
||||
|
||||
template <const int M, const int N, const int K,
|
||||
const int B, const char **MNEMONIC>
|
||||
|
||||
Reference in New Issue
Block a user