diff --git a/src/arch/amdgpu/common/dtype/README.md b/src/arch/amdgpu/common/dtype/README.md new file mode 100644 index 0000000000..02f1964fdb --- /dev/null +++ b/src/arch/amdgpu/common/dtype/README.md @@ -0,0 +1,21 @@ +# Microscaling Formats + +This directory defines [microscaling formats](https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf) which are reduced precision floating point formats. +The class makes some assumptions to simplify things and is not completely generic. +For example: +- Types must be smaller than 32-bits. +- Type conversions currently assume that either: + - The destination format exponent and mantissa bits are both greater or equal to the source format. + - OR the destination format exponent and mantissa are both less than or equal to the source format. + - In other words, one type cannot have larger exponent and smaller mantissa and visa versa. +- Basic MX operations are implementation defined, meaning MX types can be converted to FP32 for arithmetic + - This means that arithmetic operators need not be defined for MX types. +- Exponent and mantissa of zero is zero. There is no special case for the sign (i.e, -0 is not special). +- The spec does not differentiate between signaling and quiet NaN, therefore quiet NaN is used. +- New types must template specialize the following standard library methods: + - isinf(T) + - isnan(T) + - isnormal(T) +- New types must template specialize the following std::numeric_limits members / methods: + - has_infinity / infinity() + - has_quiet_NaN / quiet_NaN() diff --git a/src/arch/amdgpu/common/dtype/binary32.hh b/src/arch/amdgpu/common/dtype/binary32.hh new file mode 100644 index 0000000000..441eed57ca --- /dev/null +++ b/src/arch/amdgpu/common/dtype/binary32.hh @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ + +namespace gem5 +{ + +namespace AMDGPU +{ + +// Same as IEEE 754 binary 32 - Microscaling types are converted to/from +// this format by default. For now as there do not seem to be any MI300 +// instructions operating directly on the types (i.e., they all cast to FP32 +// first and then perform arithmetic operations). +typedef union binary32_u +{ + enum bitSizes + { + ebits = 8, + mbits = 23, + sbits = 1, + bias = 127, + + inf = 0x7f800000, + nan = 0x7f800100, + max = 0x7f7fffff + }; + + uint32_t storage; + float fp32; + struct + { + unsigned mant : 23; + unsigned exp : 8; + unsigned sign : 1; + }; + + // To help with stdlib functions with T = float. + operator float() const + { + return fp32; + } +} binary32; +static_assert(sizeof(binary32) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + +namespace std +{ + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::binary32 quiet_NaN() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::quiet_NaN(); + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::binary32 infinity() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::infinity(); + return tmp; + } + + static gem5::AMDGPU::binary32 max() + { + gem5::AMDGPU::binary32 tmp; + tmp.fp32 = std::numeric_limits::max(); + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_BINARY32_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp16_e5m10.hh b/src/arch/amdgpu/common/dtype/fp16_e5m10.hh new file mode 100644 index 0000000000..363dcada12 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp16_e5m10.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 5, + mbits = 10, + sbits = 1, + zbits = 16, + bias = 15, + + inf = 0x7c000000, + nan = 0x7c100000, + max = 0x7bff0000 + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp16_e5m10_info; +static_assert(sizeof(fp16_e5m10_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp16_e5m10_info a) +{ + return a.exp == 0x1F && a.mant == 0; +} + +constexpr bool isnan(gem5::AMDGPU::fp16_e5m10_info a) +{ + return a.exp == 0x1F && a.mant != 0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp16_e5m10_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp16_e5m10_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp16_e5m10_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp16_e5m10_info max() + { + gem5::AMDGPU::fp16_e5m10_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e5m10_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP16_E5M10_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp16_e8m7.hh b/src/arch/amdgpu/common/dtype/fp16_e8m7.hh new file mode 100644 index 0000000000..3c796fca51 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp16_e8m7.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 8, + mbits = 7, + sbits = 1, + zbits = 16, + bias = 127, + + inf = 0x7f800000, + nan = 0x7f810000, + max = 0x7f7f0000 + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp16_e8m7_info; +static_assert(sizeof(fp16_e8m7_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp16_e8m7_info a) +{ + return a.exp == 0xFF && a.mant == 0; +} + +constexpr bool isnan(gem5::AMDGPU::fp16_e8m7_info a) +{ + return a.exp == 0xFF && a.mant != 0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp16_e8m7_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp16_e8m7_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp16_e8m7_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp16_e8m7_info max() + { + gem5::AMDGPU::fp16_e8m7_info tmp; + tmp.storage = gem5::AMDGPU::fp16_e8m7_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP16_E8M7_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp8_e4m3.hh b/src/arch/amdgpu/common/dtype/fp8_e4m3.hh new file mode 100644 index 0000000000..46d2685c00 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp8_e4m3.hh @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 4, + mbits = 3, + sbits = 1, + zbits = 24, + bias = 7, + + inf = (0x7f << zbits), + nan = (0xff << zbits), + max = (0x7f << zbits) + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp8_e4m3_info; +static_assert(sizeof(fp8_e4m3_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +// Inf not defined +constexpr bool isinf(gem5::AMDGPU::fp8_e4m3_info a) { return false; } + +constexpr bool isnan(gem5::AMDGPU::fp8_e4m3_info a) +{ + return a.exp == 0xF && a.mant == 0x7; +} + +constexpr bool isnormal(gem5::AMDGPU::fp8_e4m3_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp8_e4m3_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::nan; + return tmp; + } + + static constexpr bool has_infinity = false; + static gem5::AMDGPU::fp8_e4m3_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp8_e4m3_info max() + { + gem5::AMDGPU::fp8_e4m3_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e4m3_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP8_E4M3_HH__ diff --git a/src/arch/amdgpu/common/dtype/fp8_e5m2.hh b/src/arch/amdgpu/common/dtype/fp8_e5m2.hh new file mode 100644 index 0000000000..9e1f5812d5 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/fp8_e5m2.hh @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ + +#include + +namespace gem5 +{ + +namespace AMDGPU +{ + +typedef union +{ + enum bitSizes + { + ebits = 5, + mbits = 2, + sbits = 1, + zbits = 24, + bias = 15, + + inf = (0x7c << zbits), + nan = (0xff << zbits), + max = (0x7f << zbits) + }; + + uint32_t storage; + struct + { + unsigned zero : zbits; + unsigned mant : mbits; + unsigned exp : ebits; + unsigned sign : sbits; + }; +} fp8_e5m2_info; +static_assert(sizeof(fp8_e5m2_info) == 4); + +} // namespace AMDGPU + +} // namespace gem5 + + +// std library cmath definitions +namespace std +{ + +constexpr bool isinf(gem5::AMDGPU::fp8_e5m2_info a) +{ + return a.exp == 0x1F && a.mant == 0x0; +} + +constexpr bool isnan(gem5::AMDGPU::fp8_e5m2_info a) +{ + return a.exp == 0x1F && a.mant != 0x0; +} + +constexpr bool isnormal(gem5::AMDGPU::fp8_e5m2_info a) +{ + return !(a.exp == 0 && a.mant != 0); +} + +template<> +class numeric_limits +{ + public: + static constexpr bool has_quiet_NaN = true; + static gem5::AMDGPU::fp8_e5m2_info quiet_NaN() + { + assert(has_quiet_NaN); + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::nan; + return tmp; + } + + static constexpr bool has_infinity = true; + static gem5::AMDGPU::fp8_e5m2_info infinity() + { + assert(has_infinity); + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::inf; + return tmp; + } + + static gem5::AMDGPU::fp8_e5m2_info max() + { + gem5::AMDGPU::fp8_e5m2_info tmp; + tmp.storage = gem5::AMDGPU::fp8_e5m2_info::max; + return tmp; + } +}; + +} // namespace std + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_FP8_E5M2_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp.hh b/src/arch/amdgpu/common/dtype/mxfp.hh new file mode 100644 index 0000000000..d7edb32dbf --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp.hh @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ + +#include +#include +#include + +#include "arch/amdgpu/common/dtype/mxfp_convert.hh" + +namespace gem5 +{ + +namespace AMDGPU +{ + +// Base class for all microscaling types. The sizes of everything are +// determined by the enum fields in the FMT struct. All of these share the +// same operator overloads which convert to float before arithmetic and +// convert back if assigned to a microscaling type. +template +class mxfp +{ + public: + mxfp() = default; + mxfp(float f) : mode(roundTiesToEven) + { + data = float_to_mxfp(f); + } + + // Set raw bits, used by gem5 to set a raw value read from VGPRs. + mxfp(const uint32_t& raw) + { + // The info unions end up being "left" aligned. For example, in FP4 + // only the bits 31:28 are used. Shift the input by the storage size + // of 32 by the type size (sign + exponent + mantissa bits). + data = raw; + data <<= (32 - int(FMT::sbits) - int(FMT::ebits) - int(FMT::mbits)); + } + + mxfp(const mxfp& f) + { + FMT conv_out; + conv_out = convertMXFP(f.getFmt()); + data = conv_out.storage; + } + + mxfp& + operator=(const float& f) + { + data = float_to_mxfp(f); + return *this; + } + + mxfp& + operator=(const mxfp& f) + { + FMT conv_out; + conv_out = convertMXFP(f.getFmt()); + data = conv_out.storage; + return *this; + } + + operator float() const + { + binary32 out; + FMT in; + in.storage = data; + out = convertMXFP(in, mode); + + return out.fp32; + } + + constexpr static int + size() + { + return int(FMT::mbits) + int(FMT::ebits) + int(FMT::sbits); + } + + // Intentionally use storage > size() so that a storage type is not needed + // as a template parameter. + uint32_t data = 0; + + FMT + getFmt() const + { + FMT out; + out.storage = data; + return out; + } + + void + setFmt(FMT in) + { + data = in.storage; + } + + void + scale(const float& f) + { + binary32 bfp; + bfp.fp32 = f; + int scale_val = bfp.exp - bfp.bias; + + // Scale value of 0xFF is NaN. Scaling by NaN returns NaN. + // In this implementation, types without NaN define it as zero. + if (scale_val == 0xFF) { + data = FMT::nan; + return; + } + + FMT in = getFmt(); + int exp = in.exp; + + if (exp + scale_val > max_exp()) { + in.exp = max_exp(); + } else if (exp + scale_val < min_exp()) { + in.exp = min_exp(); + } else { + in.exp = exp + scale_val; + } + + data = in.storage; + } + + private: + mxfpRoundingMode mode = roundTiesToEven; + + uint32_t + float_to_mxfp(float f) + { + if (std::isinf(f)) { + assert(std::numeric_limits::has_infinity); + return FMT::inf; + } + + if (std::isnan(f)) { + assert(std::numeric_limits::has_quiet_NaN); + return FMT::nan; + } + + return float_to_mxfp_nocheck(f); + } + + uint32_t + float_to_mxfp_nocheck(float f) + { + binary32 in; + in.fp32 = f; + + FMT out; + out.storage = 0; + + out = convertMXFP(in, mode); + + return out.storage; + } +}; + +// Unary operators +template +inline T operator+(T a) +{ + return a; +} + +template +inline T operator-(T a) +{ + // Flip sign bit + a.data ^= 0x80000000; + return a; +} + +template +inline T operator++(T a) +{ + a = a + T(1.0f); + return a; +} + +template +inline T operator--(T a) +{ + a = a - T(1.0f); + return a; +} + +template +inline T operator++(T a, int) +{ + T original = a; + ++a; + return original; +} + +template +inline T operator--(T a, int) +{ + T original = a; + --a; + return original; +} + +// Math operators +template +inline T operator+(T a, T b) +{ + return T(float(a) + float(b)); +} + +template +inline T operator-(T a, T b) +{ + return T(float(a) - float(b)); +} + +template +inline T operator*(T a, T b) +{ + return T(float(a) * float(b)); +} + +template +inline T operator/(T a, T b) +{ + return T(float(a) / float(b)); +} + +template +inline T operator+=(T &a, T b) +{ + a = a + b; + return a; +} + +template +inline T operator-=(T &a, T b) +{ + a = a - b; + return a; +} + +template +inline T operator*=(T &a, T b) +{ + a = a * b; + return a; +} + +template +inline T operator/=(T &a, T b) +{ + a = a / b; + return a; +} + +// Comparison operators +template +inline bool operator<(T a, T b) +{ + return float(a) < float(b); +} + +template +inline bool operator>(T a, T b) +{ + return float(a) > float(b); +} + +template +inline bool operator<=(T a, T b) +{ + return float(a) <= float(b); +} + +template +inline bool operator>=(T a, T b) +{ + return float(a) >= float(b); +} + +template +inline bool operator==(T a, T b) +{ + return float(a) == float(b); +} + +template +inline bool operator!=(T a, T b) +{ + return float(a) != float(b); +} + +} // namespace AMDGPU + +} // namespace gem5 + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp_convert.hh b/src/arch/amdgpu/common/dtype/mxfp_convert.hh new file mode 100644 index 0000000000..641d5f5732 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_convert.hh @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ + +#include + +#include "arch/amdgpu/common/dtype/mxfp_type_info.hh" +#include "base/bitfield.hh" + +namespace gem5 +{ + +namespace AMDGPU +{ + +// The various rounding modes for microscaling formats. roundTiesToEven must +// be supported. Other rounding modes may be supported. +enum mxfpRoundingMode +{ + roundTiesToEven, + roundStochastic +}; + +// Conversion functions - For instructions that convert from one microscaling +// format to another. We only need the conversion functions as there do not +// appear to be any instructions yet which operate directly on the MX formats. +// +// in - An MXFP info struct type +// mode - rounding mode +// seed - input value for stochastic rounding function +template +dFMT convertMXFP(sFMT in, mxfpRoundingMode mode = roundTiesToEven, + uint32_t seed = 0) +{ + // We assume that *both* exponent and mantissa bits are both >= or <= + // the target type. Checkable at compile time. + // + // This is not necessarily a limitation, others just are not implemented. + // Figuring this out would be interesting for converting FP8 <-> BF8 for + // example. So far all GPU conversion instructions convert explicitly to + // a larger type from a smaller type or smaller to larger. + static_assert(((int(sFMT::mbits) >= int(dFMT::mbits)) && + (int(sFMT::ebits) >= int(dFMT::ebits))) + || ((int(sFMT::mbits) <= int(dFMT::mbits)) && + (int(sFMT::ebits) <= int(dFMT::ebits)))); + + dFMT out; + out.storage = 0; + + if (int(sFMT::mbits) >= int(dFMT::mbits) && + int(sFMT::ebits) >= int(dFMT::ebits)) { + // Input format is larger, truncate and round mantissa. MX formats + // are subnormal if exp == 0. Zero out exp in that case. + + if (std::isnan(in)) { + // For types with no NaN return max value. + if (std::numeric_limits::has_quiet_NaN) { + out = std::numeric_limits::quiet_NaN(); + } else { + out = std::numeric_limits::max(); + } + } else if (std::isinf(in)) { + // For types with no Inf return max value. + if (std::numeric_limits::has_infinity) { + out = std::numeric_limits::infinity(); + } else { + out = std::numeric_limits::max(); + } + } else if (in.mant == 0 && in.exp == 0) { + // All MX formats FP32, and FP64 encode 0 as all zeros. Keep sign. + out.mant = 0; + out.exp = 0; + out.sign = in.sign; + } else { + // Extra bits are needed for the mantissa conversion. + uint32_t mant = in.mant & mask(sFMT::mbits); + int32_t exp = in.exp - sFMT::bias + dFMT::bias; + out.sign = in.sign; + + // Input is not subnormal, add the implicit 1 bit. + if (in.exp) { + mant |= (1 << sFMT::mbits); + } + + mant >>= (sFMT::mbits - dFMT::mbits); + + // Output became subnormal + if (exp < 1) { + int shift = 1 - exp; + mant >>= shift; + out.exp = 0; + } else { + out.exp = exp; + } + + mant &= mask(dFMT::mbits); + out.mant = mant; + + // roundTiesToEven is the only required rounding mode for MXFP + // types. Here we take the original mantissa and check the final + // bit which is shifted out when converting the mantissa. If that + // value is one, then we should round up to the next representable + // number. If the value is one and all other discarded mantissa + // bits are zero, round towards the number which has an even (0) + // bit value in the least significant mantissa bit. + // + // For denormals, the process is similar however we check the nth + // bit of the converted mantissa, where n is the absolute value of + // the converted exponent. If the value of |exp| is larger than + // the max exponent, round to zero. If it is exactly equal, always + // round up. + // + // If the number of destination and source format mantissa bits are + // the same, the mantissa is unchanged. + if (int(sFMT::mbits) > int(dFMT::mbits) + && mode == roundTiesToEven) { + bool round_up = false; + + int check_shift = sFMT::mbits - dFMT::mbits - 1; + uint32_t check_mant = in.mant & mask(sFMT::mbits); + + check_mant >>= check_shift; + + // out.exp == 0 means subnormal + if (out.exp == 0) { + check_mant = in.mant >> (sFMT::mbits - dFMT::mbits); + + uint32_t max_exp = mask(dFMT::ebits); + if (-exp > max_exp) { + // if exp < -(1 << dFMT::ebits), result should be 0 + round_up = false; + } else if (-exp == max_exp) { + // if exp == -(1 << dFMT::ebits), round up + round_up = true; + } else { + // Use the |exp|'th bit to determine rounding + int check_bit = 1 << -exp; + round_up = (check_mant & check_bit); + } + } else { + round_up = (check_mant & 0x1); + } + + // For roundTiesToEven, if we are exactly between two + // representable numbers, pick the one with an even least + // significant mantissa bit. We are exactly between when + // all of the discarded mantissa bits are 0 (i.e., !sticky). + int sticky = in.mant & mask(sFMT::mbits - dFMT::mbits); + if (round_up && !sticky) { + if (!(out.mant & 1)) { + round_up = false; + } + } + + if (round_up) { + if (out.mant == mask(dFMT::mbits)) { + // mantissa at max value, increment exponent if not inf + if (out.exp != mask(dFMT::ebits)) { + out.exp++; + } + out.mant = 0; + } else { + out.mant++; + } + } + } else if (int(sFMT::mbits) > int(dFMT::mbits) + && mode == roundStochastic) { + // Use the discarded mantissa divided by the max mantissa of + // the source format to determine the probability of rounding + // up. An alternate implementation of this would be to get a + // random number and add that to the input mantissa. Then + // follow the normal rounding path above. + uint32_t discarded = in.mant & mask(sFMT::mbits - dFMT::mbits); + uint32_t max_mant = mask(sFMT::mbits); + + float round_prob = float(discarded) / float(max_mant); + + // Use a stochastic rounding function with the seed value to + // determine compare probability. This is implemented as a + // "Galois LFSR." + auto srFunc = [](uint32_t in) { + uint32_t bit = (in ^ (in >> 1) ^ (in >> 3) ^ (in >> 12)); + return (in >> 1) | (bit << 15); + }; + + // Assume stochastic rounding returns up to max uint32_t. + // This will return an FP value between 0.0f and 1.0f. + float draw_prob = float(srFunc(seed)) + / float(std::numeric_limits::max()); + + // Round up if the number we drew is less than the rounding + // probability. E.g., if round_prob is 90% (0.9) we choose + // values 0.0f - 0.90f to round up. + if (round_prob >= draw_prob) { + if (out.mant == mask(dFMT::mbits)) { + // mantissa at max value, increment exponent if not inf + if (out.exp != mask(dFMT::ebits)) { + out.exp++; + } + out.mant = 0; + } else { + out.mant++; + } + } + } + } + } else if (int(sFMT::mbits) <= int(dFMT::mbits) && + int(sFMT::ebits) <= int(dFMT::ebits)) { + // Input format is smaller. Extend mantissa / exponent and pad with 0. + // Should be the same for all non-stochastic rounding modes. + + if (std::isnan(in)) { + // For types with no NaN return max value. + if (std::numeric_limits::has_quiet_NaN) { + out = std::numeric_limits::quiet_NaN(); + } else { + out = std::numeric_limits::max(); + } + } else if (std::isinf(in)) { + // For types with no Inf return max value. + if (std::numeric_limits::has_infinity) { + out = std::numeric_limits::infinity(); + } else { + out = std::numeric_limits::max(); + } + } else if (in.mant == 0 && in.exp == 0) { + // All MX formats FP32, and FP64 encode 0 as all zeros. Keep sign. + out.mant = 0; + out.exp = 0; + out.sign = in.sign; + } else { + out.mant = in.mant << (dFMT::mbits - sFMT::mbits); + out.exp = in.exp + dFMT::bias - sFMT::bias; + out.sign = in.sign; + + // Normalize input denormals + if (!in.exp && int(sFMT::ebits) != int(dFMT::ebits)) { + uint32_t m = out.mant; + if (m != 0) { + out.exp++; + while (!(m >> dFMT::mbits)) { + m <<= 1; + out.exp--; + } + out.mant = m & mask(dFMT::mbits); + } + } else if (!in.exp) { + // Exponent is the same, but output is not denorm, so add + // implicit 1. This is specific mainly to bf16 -> f32. + uint32_t m = out.mant; + m <<= 1; + out.mant = m & mask(dFMT::mbits); + } + } + } else { + assert(false); + } + + return out; +} + +template +int min_exp() +{ + return 1; +} + +template +int max_exp() +{ + return (1 << FMT::ebits) - 1; +} + + +} // namespace AMDGPU + +} // namespace gem5 + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_CONVERT_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp_type_info.hh b/src/arch/amdgpu/common/dtype/mxfp_type_info.hh new file mode 100644 index 0000000000..fe433523d6 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_type_info.hh @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ + +#include "arch/amdgpu/common/dtype/binary32.hh" +#include "arch/amdgpu/common/dtype/fp16_e5m10.hh" +#include "arch/amdgpu/common/dtype/fp16_e8m7.hh" +#include "arch/amdgpu/common/dtype/fp8_e4m3.hh" +#include "arch/amdgpu/common/dtype/fp8_e5m2.hh" + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPE_INFO_HH__ diff --git a/src/arch/amdgpu/common/dtype/mxfp_types.hh b/src/arch/amdgpu/common/dtype/mxfp_types.hh new file mode 100644 index 0000000000..29155901d4 --- /dev/null +++ b/src/arch/amdgpu/common/dtype/mxfp_types.hh @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__ +#define __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__ + +#include "arch/amdgpu/common/dtype/mxfp.hh" + +namespace gem5 +{ +namespace AMDGPU +{ + +using mxbfloat8 = mxfp; +using mxfloat8 = mxfp; + +using mxbfloat16 = mxfp; +using mxfloat16 = mxfp; + +using mxfloat32 = mxfp; + +} +} + +#endif // __ARCH_AMDGPU_COMMON_DTYPE_MXFP_TYPES_HH__