arch-arm: Add FP16 support and other primitives to fplib
This changeset: - extends fplib to support emulation of half-precision floating-point (FP16) operations; - extends fplib to support additional primitives introduced by the Arm Scalable Vector Extension (SVE) (fplibExpa, fplibScale, fplibTrigMAdd, fplibTrigSMul, fplibTrigSSel); - adds the FZ16 bit to FPSCR; - cleans up fplib code by replacing constants with preprocessor macros and by adding inline functions to recognise NaNs and infinities. Change-Id: If8fdb2a5824b478c8310bbc126ec60cc1105f135 Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/13044 Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
This commit is contained in:
committed by
Giacomo Gabrielli
parent
b992ecbc5b
commit
3afece061e
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2013 ARM Limited
|
||||
* Copyright (c) 2012-2013, 2017-2018 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -89,12 +89,18 @@ bool fplibCompareGE(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point compare greater than. */
|
||||
template <class T>
|
||||
bool fplibCompareGT(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point compare unordered. */
|
||||
template <class T>
|
||||
bool fplibCompareUN(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point convert precision. */
|
||||
template <class T1, class T2>
|
||||
T2 fplibConvert(T1 op, FPRounding rounding, FPSCR &fpscr);
|
||||
/** Floating-point division. */
|
||||
template <class T>
|
||||
T fplibDiv(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point exponential accelerator. */
|
||||
template <class T>
|
||||
T fplibExpA(T op);
|
||||
/** Floating-point maximum. */
|
||||
template <class T>
|
||||
T fplibMax(T op1, T op2, FPSCR &fpscr);
|
||||
@@ -137,12 +143,24 @@ T fplibRecpX(T op, FPSCR &fpscr);
|
||||
/** Floating-point convert to integer. */
|
||||
template <class T>
|
||||
T fplibRoundInt(T op, FPRounding rounding, bool exact, FPSCR &fpscr);
|
||||
/** Floating-point adjust exponent. */
|
||||
template <class T>
|
||||
T fplibScale(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point square root. */
|
||||
template <class T>
|
||||
T fplibSqrt(T op, FPSCR &fpscr);
|
||||
/** Floating-point subtract. */
|
||||
template <class T>
|
||||
T fplibSub(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point trigonometric multiply-add coefficient. */
|
||||
template <class T>
|
||||
T fplibTrigMulAdd(uint8_t coeff_index, T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point trigonometric starting value. */
|
||||
template <class T>
|
||||
T fplibTrigSMul(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point trigonometric select coefficient. */
|
||||
template <class T>
|
||||
T fplibTrigSSel(T op1, T op2, FPSCR &fpscr);
|
||||
/** Floating-point convert to fixed-point. */
|
||||
template <class T1, class T2>
|
||||
T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr);
|
||||
@@ -150,33 +168,57 @@ T2 fplibFPToFixed(T1 op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr);
|
||||
template <class T>
|
||||
T fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
/** Floating-point value for +/- infinity. */
|
||||
template <class T>
|
||||
T fplibInfinity(int sgn);
|
||||
/** Foating-point value for default NaN. */
|
||||
template <class T>
|
||||
T fplibDefaultNaN();
|
||||
|
||||
/* Function specializations... */
|
||||
template <>
|
||||
uint16_t fplibAbs(uint16_t op);
|
||||
template <>
|
||||
uint32_t fplibAbs(uint32_t op);
|
||||
template <>
|
||||
uint64_t fplibAbs(uint64_t op);
|
||||
template <>
|
||||
uint16_t fplibAdd(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
int fplibCompare(uint16_t op1, uint16_t op2, bool signal_nans, FPSCR &fpscr);
|
||||
template <>
|
||||
int fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr);
|
||||
template <>
|
||||
int fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareEQ(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareEQ(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareEQ(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGE(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGE(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGE(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGT(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGT(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareGT(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareUN(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareUN(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
bool fplibCompareUN(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr);
|
||||
@@ -189,95 +231,188 @@ uint64_t fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibDiv(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibExpA(uint16_t op);
|
||||
template <>
|
||||
uint32_t fplibExpA(uint32_t op);
|
||||
template <>
|
||||
uint64_t fplibExpA(uint64_t op);
|
||||
template <>
|
||||
uint16_t fplibMax(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMaxNum(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMin(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMinNum(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMul(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMulAdd(uint16_t addend, uint16_t op1, uint16_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibMulX(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibNeg(uint16_t op);
|
||||
template <>
|
||||
uint32_t fplibNeg(uint32_t op);
|
||||
template <>
|
||||
uint64_t fplibNeg(uint64_t op);
|
||||
template <>
|
||||
uint16_t fplibRSqrtEstimate(uint16_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr);
|
||||
template<>
|
||||
uint64_t fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibRSqrtStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibRecipEstimate(uint16_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRecipEstimate(uint32_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibRecipEstimate(uint64_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibRecipStepFused(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibRecpX(uint16_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRecpX(uint32_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibRecpX(uint64_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibRoundInt(uint16_t op, FPRounding rounding, bool exact,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibRoundInt(uint32_t op, FPRounding rounding, bool exact,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibRoundInt(uint64_t op, FPRounding rounding, bool exact,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibScale(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibScale(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibScale(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibSqrt(uint16_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibSqrt(uint32_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibSqrt(uint64_t op, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibSub(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibTrigMulAdd(uint8_t coeff_index, uint16_t op1, uint16_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibTrigMulAdd(uint8_t coeff_index, uint32_t op1, uint32_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibTrigMulAdd(uint8_t coeff_index, uint64_t op1, uint64_t op2,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibTrigSMul(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibTrigSMul(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibTrigSMul(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibTrigSSel(uint16_t op1, uint16_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibTrigSSel(uint32_t op1, uint32_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibTrigSSel(uint64_t op1, uint64_t op2, FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibFPToFixed(uint16_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint32_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint64_t fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding,
|
||||
FPSCR &fpscr);
|
||||
template <>
|
||||
uint16_t fplibInfinity(int sgn);
|
||||
template <>
|
||||
uint32_t fplibInfinity(int sgn);
|
||||
template <>
|
||||
uint64_t fplibInfinity(int sgn);
|
||||
template <>
|
||||
uint16_t fplibDefaultNaN();
|
||||
template <>
|
||||
uint32_t fplibDefaultNaN();
|
||||
template <>
|
||||
uint64_t fplibDefaultNaN();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -413,6 +413,7 @@ namespace ArmISA
|
||||
Bitfield<12> ixe;
|
||||
Bitfield<15> ide;
|
||||
Bitfield<18, 16> len;
|
||||
Bitfield<19> fz16;
|
||||
Bitfield<21, 20> stride;
|
||||
Bitfield<23, 22> rMode;
|
||||
Bitfield<24> fz;
|
||||
|
||||
Reference in New Issue
Block a user