arch-vega: Fix V_FMAC_F32 data type

The datatype is U32 but should be F32. This is causing an implicit cast leading to incorrect results. This fixes nn.Dropout in PyTorch. Change-Id: I546aa917fde1fd6bc832d9d0fa9ffe66505e87dd
2024-03-18 19:02:10 -05:00
parent ba2f5615ba
commit 3f8d0e1ef8
1 changed files with 3 additions and 3 deletions
--- a/src/arch/amdgpu/vega/insts/vop2.cc
+++ b/src/arch/amdgpu/vega/insts/vop2.cc
@@ -2167,9 +2167,9 @@ namespace VegaISA
    Inst_VOP2__V_FMAC_F32::execute(GPUDynInstPtr gpuDynInst)
    {
        Wavefront *wf = gpuDynInst->wavefront();
-        ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
-        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
-        VecOperandU32 vdst(gpuDynInst, instData.VDST);
+        ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
+        ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
+        VecOperandF32 vdst(gpuDynInst, instData.VDST);

        src0.readSrc();
        src1.read();