From c1251f51c12d48b084245455eac0088d77c6b232 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Sat, 10 Aug 2024 09:58:03 -0700
Subject: [PATCH] arch-vega: Introduce two scaling methods for microscaling
 types

Currently there is only a scale() method which multiplies a microscaling
type by an int8 value. This should only be applied when upcasting to
a larger type after conversion to match hardware. When downcasting to a
smaller type, the scaling method should divide by the int8 value before
conversion.

This commit adds both scaling methods.

Change-Id: Ibafa8caa389cde4df609e536cd53bd2289959420
---
 src/arch/amdgpu/common/dtype/mxfp.hh | 63 +++++++++++++++++++---------
 1 file changed, 44 insertions(+), 19 deletions(-)
diff --git a/src/arch/amdgpu/common/dtype/mxfp.hh b/src/arch/amdgpu/common/dtype/mxfp.hh
index d7edb32dbf..8a70b84012 100644
--- a/src/arch/amdgpu/common/dtype/mxfp.hh
+++ b/src/arch/amdgpu/common/dtype/mxfp.hh
@@ -125,20 +125,23 @@ class mxfp
         data = in.storage;
     }
 
+    // Used for upcasting
     void
-    scale(const float& f)
+    scaleMul(const float& f)
     {
         binary32 bfp;
         bfp.fp32 = f;
-        int scale_val = bfp.exp - bfp.bias;
+        int scale_val = bfp.exp;
 
         // Scale value of 0xFF is NaN. Scaling by NaN returns NaN.
-        // In this implementation, types without NaN define it as zero.
+        // In this implementation, types without NaN define it as max().
         if (scale_val == 0xFF) {
             data = FMT::nan;
             return;
         }
 
+        scale_val -= bfp.bias;
+
         FMT in = getFmt();
         int exp = in.exp;
 
@@ -153,27 +156,49 @@ class mxfp
         data = in.storage;
     }
 
+    // Used for downcasting
+    void
+    scaleDiv(const float& f)
+    {
+        binary32 bfp;
+        bfp.fp32 = f;
+        int scale_val = bfp.exp;
+
+        // Scale value of 0xFF is NaN. Scaling by NaN returns NaN.
+        // In this implementation, types without NaN define it as max().
+        if (scale_val == 0xFF) {
+            data = FMT::nan;
+            return;
+        }
+
+        scale_val -= bfp.bias;
+
+        FMT in = getFmt();
+        int exp = in.exp;
+
+        if (exp - scale_val > max_exp<FMT>()) {
+            in.exp = max_exp<FMT>();
+        } else if (exp - scale_val < min_exp<FMT>()) {
+            in.exp = min_exp<FMT>();
+        } else {
+            in.exp = exp - scale_val;
+
+            // Output become denorm
+            if (in.exp == 0) {
+                uint32_t m = in.mant | 1 << FMT::mbits;
+                m >>= 1;
+                in.mant = m & mask(FMT::mbits);
+            }
+        }
+
+        data = in.storage;
+    }
+
   private:
     mxfpRoundingMode mode = roundTiesToEven;
 
     uint32_t
     float_to_mxfp(float f)
-    {
-        if (std::isinf(f)) {
-            assert(std::numeric_limits<FMT>::has_infinity);
-            return FMT::inf;
-        }
-
-        if (std::isnan(f)) {
-            assert(std::numeric_limits<FMT>::has_quiet_NaN);
-            return FMT::nan;
-        }
-
-        return float_to_mxfp_nocheck(f);
-    }
-
-    uint32_t
-    float_to_mxfp_nocheck(float f)
     {
         binary32 in;
         in.fp32 = f;