arch-riscv: Refactor the RISC-V multiplication utility (#780)

1. Add the new double width for int64_t and uint64_t
2. Use the wider type to get the upper result of multiplication

Change-Id: Id6cfa6f274c65592b2b3e2b70c00f82954b41f1a
This commit is contained in:
Yu-Cheng Chang
2024-01-19 04:40:11 +08:00
committed by GitHub
parent 511729ab76
commit f56459470a
2 changed files with 39 additions and 69 deletions

View File

@@ -1467,9 +1467,9 @@ decode QUADRANT default Unknown::unknown() {
}});
0x1: mulh({{
if (machInst.rv_type == RV32) {
Rd_sd = mulh_32(Rs1_sd, Rs2_sd);
Rd_sd = mulh<int32_t>(Rs1_sd, Rs2_sd);
} else {
Rd_sd = mulh_64(Rs1_sd, Rs2_sd);
Rd_sd = mulh<int64_t>(Rs1_sd, Rs2_sd);
}
}}, IntMultOp);
0x5: clmul({{
@@ -1506,9 +1506,9 @@ decode QUADRANT default Unknown::unknown() {
}});
0x1: mulhsu({{
if (machInst.rv_type == RV32) {
Rd_sd = mulhsu_32(Rs1_sd, Rs2);
Rd_sd = mulhsu<int32_t>(Rs1_sd, Rs2);
} else {
Rd_sd = mulhsu_64(Rs1_sd, Rs2);
Rd_sd = mulhsu<int64_t>(Rs1_sd, Rs2);
}
}}, IntMultOp);
0x5: clmulr({{
@@ -1539,9 +1539,9 @@ decode QUADRANT default Unknown::unknown() {
}});
0x1: mulhu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)mulhu_32(Rs1, Rs2);
Rd = (int32_t)mulhu<uint32_t>(Rs1, Rs2);
} else {
Rd = mulhu_64(Rs1, Rs2);
Rd = mulhu<uint64_t>(Rs1, Rs2);
}
}}, IntMultOp);
0x5: clmulh({{
@@ -3292,7 +3292,7 @@ decode QUADRANT default Unknown::unknown() {
Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Vs1_vu[i])
>> sew;
} else {
Vd_vu[i] = mulhu_64(Vs2_vu[i], Vs1_vu[i]);
Vd_vu[i] = mulhu<uint64_t>(Vs2_vu[i], Vs1_vu[i]);
}
}}, OPMVV, VectorIntegerArithOp);
0x25: vmul_vv({{
@@ -3304,7 +3304,7 @@ decode QUADRANT default Unknown::unknown() {
(uint64_t)Vs1_vu[i])
>> sew;
} else {
Vd_vi[i] = mulhsu_64(Vs2_vi[i], Vs1_vu[i]);
Vd_vi[i] = mulhsu<int64_t>(Vs2_vi[i], Vs1_vu[i]);
}
}}, OPMVV, VectorIntegerArithOp);
0x27: vmulh_vv({{
@@ -3312,7 +3312,7 @@ decode QUADRANT default Unknown::unknown() {
Vd_vi[i] = ((int64_t)Vs2_vi[i] * Vs1_vi[i])
>> sew;
} else {
Vd_vi[i] = mulh_64(Vs2_vi[i], Vs1_vi[i]);
Vd_vi[i] = mulh<int64_t>(Vs2_vi[i], Vs1_vi[i]);
}
}}, OPMVV, VectorIntegerArithOp);
0x29: vmadd_vv({{
@@ -4384,7 +4384,7 @@ decode QUADRANT default Unknown::unknown() {
Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Rs1_vu)
>> sew;
else
Vd_vu[i] = mulhu_64(Vs2_vu[i], Rs1_vu);
Vd_vu[i] = mulhu<uint64_t>(Vs2_vu[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x25: vmul_vx({{
Vd_vi[i] = Vs2_vi[i] * Rs1_vi;
@@ -4395,14 +4395,14 @@ decode QUADRANT default Unknown::unknown() {
(uint64_t)Rs1_vu)
>> sew;
else
Vd_vi[i] = mulhsu_64(Vs2_vi[i], Rs1_vu);
Vd_vi[i] = mulhsu<int64_t>(Vs2_vi[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x27: vmulh_vx({{
if (sew < 64)
Vd_vi[i] = ((int64_t)Vs2_vi[i] * Rs1_vi)
>> sew;
else
Vd_vi[i] = mulh_64(Vs2_vi[i], Rs1_vi);
Vd_vi[i] = mulh<int64_t>(Vs2_vi[i], Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x29: vmadd_vx({{
Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i];

View File

@@ -65,6 +65,21 @@ namespace gem5
namespace RiscvISA
{
template<typename Type> struct double_width;
template<> struct double_width<uint8_t> { using type = uint16_t;};
template<> struct double_width<uint16_t> { using type = uint32_t;};
template<> struct double_width<uint32_t> { using type = uint64_t;};
template<> struct double_width<uint64_t> { using type = __uint128_t;};
template<> struct double_width<int8_t> { using type = int16_t; };
template<> struct double_width<int16_t> { using type = int32_t; };
template<> struct double_width<int32_t> { using type = int64_t; };
template<> struct double_width<int64_t> { using type = __int128_t; };
template<> struct double_width<float32_t> { using type = float64_t;};
template<typename Type> struct double_widthf;
template<> struct double_widthf<uint32_t> { using type = float64_t;};
template<> struct double_widthf<int32_t> { using type = float64_t;};
template<typename T> inline bool
isquietnan(T val)
{
@@ -146,57 +161,25 @@ registerName(RegId reg)
}
}
inline uint32_t
mulhu_32(uint32_t rs1, uint32_t rs2)
template <typename T> inline std::make_unsigned_t<T>
mulhu(std::make_unsigned_t<T> rs1, std::make_unsigned_t<T> rs2)
{
return ((uint64_t)rs1 * rs2) >> 32;
using WideT = typename double_width<std::make_unsigned_t<T>>::type;
return ((WideT)rs1 * rs2) >> (sizeof(T) * 8);
}
inline uint64_t
mulhu_64(uint64_t rs1, uint64_t rs2)
template <typename T> inline std::make_signed_t<T>
mulh(std::make_signed_t<T> rs1, std::make_signed_t<T> rs2)
{
uint64_t rs1_lo = (uint32_t)rs1;
uint64_t rs1_hi = rs1 >> 32;
uint64_t rs2_lo = (uint32_t)rs2;
uint64_t rs2_hi = rs2 >> 32;
uint64_t hi = rs1_hi * rs2_hi;
uint64_t mid1 = rs1_hi * rs2_lo;
uint64_t mid2 = rs1_lo * rs2_hi;
uint64_t lo = rs1_lo * rs2_lo;
uint64_t carry = ((uint64_t)(uint32_t)mid1
+ (uint64_t)(uint32_t)mid2
+ (lo >> 32)) >> 32;
return hi + (mid1 >> 32) + (mid2 >> 32) + carry;
using WideT = typename double_width<std::make_signed_t<T>>::type;
return ((WideT)rs1 * rs2) >> (sizeof(T) * 8);
}
inline int32_t
mulh_32(int32_t rs1, int32_t rs2)
template <typename T> inline std::make_signed_t<T>
mulhsu(std::make_signed_t<T> rs1, std::make_unsigned_t<T> rs2)
{
return ((int64_t)rs1 * rs2) >> 32;
}
inline int64_t
mulh_64(int64_t rs1, int64_t rs2)
{
bool negate = (rs1 < 0) != (rs2 < 0);
uint64_t res = mulhu_64(std::abs(rs1), std::abs(rs2));
return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
}
inline int32_t
mulhsu_32(int32_t rs1, uint32_t rs2)
{
return ((int64_t)rs1 * rs2) >> 32;
}
inline int64_t
mulhsu_64(int64_t rs1, uint64_t rs2)
{
bool negate = rs1 < 0;
uint64_t res = mulhu_64(std::abs(rs1), rs2);
return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
using WideT = typename double_width<std::make_signed_t<T>>::type;
return ((WideT)rs1 * rs2) >> (sizeof(T) * 8);
}
template<typename T> inline T
@@ -323,19 +306,6 @@ elem_mask(const T* vs, const int index)
return (vs[idx] >> pos) & 1;
}
template<typename Type> struct double_width;
template<> struct double_width<uint8_t> { using type = uint16_t;};
template<> struct double_width<uint16_t> { using type = uint32_t;};
template<> struct double_width<uint32_t> { using type = uint64_t;};
template<> struct double_width<int8_t> { using type = int16_t; };
template<> struct double_width<int16_t> { using type = int32_t; };
template<> struct double_width<int32_t> { using type = int64_t; };
template<> struct double_width<float32_t> { using type = float64_t;};
template<typename Type> struct double_widthf;
template<> struct double_widthf<uint32_t> { using type = float64_t;};
template<> struct double_widthf<int32_t> { using type = float64_t;};
template<typename FloatType, typename IntType = decltype(FloatType::v)> auto
ftype(IntType a) -> FloatType
{