arch-riscv: Simplify amd merge RV32/RV64 the RVM instructions

The change move the details implementation to utility.hh and merge
the RV32 and RV64 versions into one.

Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70597
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
This commit is contained in:
Roger Chang
2023-05-12 23:22:14 +08:00
parent 08644a7670
commit 9c0f337d78
2 changed files with 149 additions and 183 deletions

View File

@@ -1084,34 +1084,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: sll({{
Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
}});
0x1: decode RVTYPE {
0x0: rv32_mulh({{
Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32;
}}, IntMultOp);
0x1: mulh({{
bool negate = (Rs1_sd < 0) != (Rs2_sd < 0);
uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd);
uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32;
uint64_t hi = Rs1_hi*Rs2_hi;
uint64_t mid1 = Rs1_hi*Rs2_lo;
uint64_t mid2 = Rs1_lo*Rs2_hi;
uint64_t lo = Rs2_lo*Rs1_lo;
uint64_t carry = ((uint64_t)(uint32_t)mid1
+ (uint64_t)(uint32_t)mid2
+ (lo >> 32)) >> 32;
uint64_t res = hi +
(mid1 >> 32) +
(mid2 >> 32) +
carry;
Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0)
: res;
}}, IntMultOp);
}
0x1: mulh({{
if (machInst.rv_type == RV32) {
Rd_sd = mulh_32(Rs1_sd, Rs2_sd);
} else {
Rd_sd = mulh_64(Rs1_sd, Rs2_sd);
}
}}, IntMultOp);
0x5: clmul({{
uint64_t result = 0;
for (int i = 0; i < rvSelect(32, 64); i++) {
@@ -1144,32 +1123,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: slt({{
Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
}});
0x1: decode RVTYPE {
0x0: rv32_mulhsu({{
Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32;
}}, IntMultOp);
0x1: mulhsu({{
bool negate = Rs1_sd < 0;
uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
uint64_t Rs2_lo = (uint32_t)Rs2;
uint64_t Rs2_hi = Rs2 >> 32;
uint64_t hi = Rs1_hi*Rs2_hi;
uint64_t mid1 = Rs1_hi*Rs2_lo;
uint64_t mid2 = Rs1_lo*Rs2_hi;
uint64_t lo = Rs1_lo*Rs2_lo;
uint64_t carry = ((uint64_t)(uint32_t)mid1
+ (uint64_t)(uint32_t)mid2
+ (lo >> 32)) >> 32;
uint64_t res = hi +
(mid1 >> 32) +
(mid2 >> 32) +
carry;
Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res;
}}, IntMultOp);
}
0x1: mulhsu({{
if (machInst.rv_type == RV32) {
Rd_sd = mulhsu_32(Rs1_sd, Rs2);
} else {
Rd_sd = mulhsu_64(Rs1_sd, Rs2);
}
}}, IntMultOp);
0x5: clmulr({{
uint64_t result = 0;
uint64_t xlen = rvSelect(32, 64);
@@ -1197,27 +1157,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: sltu({{
Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
}});
0x1: decode RVTYPE {
0x0: rv32_mulhu({{
Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32;
}}, IntMultOp);
0x1: mulhu({{
uint64_t Rs1_lo = (uint32_t)Rs1;
uint64_t Rs1_hi = Rs1 >> 32;
uint64_t Rs2_lo = (uint32_t)Rs2;
uint64_t Rs2_hi = Rs2 >> 32;
uint64_t hi = Rs1_hi*Rs2_hi;
uint64_t mid1 = Rs1_hi*Rs2_lo;
uint64_t mid2 = Rs1_lo*Rs2_hi;
uint64_t lo = Rs1_lo*Rs2_lo;
uint64_t carry = ((uint64_t)(uint32_t)mid1
+ (uint64_t)(uint32_t)mid2
+ (lo >> 32)) >> 32;
Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry;
}}, IntMultOp);
}
0x1: mulhu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)mulhu_32(Rs1, Rs2);
} else {
Rd = mulhu_64(Rs1, Rs2);
}
}}, IntMultOp);
0x5: clmulh({{
uint64_t result = 0;
uint64_t xlen = rvSelect(32, 64);
@@ -1235,30 +1181,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: xor({{
Rd = rvSext(Rs1 ^ Rs2);
}});
0x1: decode RVTYPE {
0x0: rv32_div({{
constexpr int32_t kRsMin = \
std::numeric_limits<int32_t>::min();
if (Rs2_sw == 0) {
Rd_sw = -1;
} else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
Rd_sw = kRsMin;
} else {
Rd_sw = Rs1_sw/Rs2_sw;
}
}}, IntDivOp);
0x1: div({{
constexpr int64_t kRsMin = \
std::numeric_limits<int64_t>::min();
if (Rs2_sd == 0) {
Rd_sd = -1;
} else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
Rd_sd = kRsMin;
} else {
Rd_sd = Rs1_sd/Rs2_sd;
}
}}, IntDivOp);
}
0x1: div({{
if (machInst.rv_type == RV32) {
Rd_sd = div<int32_t>(Rs1, Rs2);
} else {
Rd_sd = div<int64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x4: pack({{
int xlen = rvSelect(32, 64);
Rd = rvSext(
@@ -1289,22 +1218,13 @@ decode QUADRANT default Unknown::unknown() {
Rd = rvSext(rvZext(Rs1) >>
rvSelect(Rs2<4:0>, Rs2<5:0>));
}});
0x1: decode RVTYPE {
0x0: rv32_divu({{
if (Rs2_uw == 0) {
Rd_sw = std::numeric_limits<uint32_t>::max();
} else {
Rd_sw = Rs1_uw/Rs2_uw;
}
}}, IntDivOp);
0x1: divu({{
if (Rs2 == 0) {
Rd = std::numeric_limits<uint64_t>::max();
} else {
Rd = Rs1/Rs2;
}
}}, IntDivOp);
}
0x1: divu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
} else {
Rd = divu<uint64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x20: sra({{
Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
}});
@@ -1327,30 +1247,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: or({{
Rd = rvSext(Rs1 | Rs2);
}});
0x1: decode RVTYPE {
0x0: rv32_rem({{
constexpr int32_t kRsMin = \
std::numeric_limits<int32_t>::min();
if (Rs2_sw == 0) {
Rd_sw = Rs1_sw;
} else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
Rd_sw = 0;
} else {
Rd_sw = Rs1_sw%Rs2_sw;
}
}}, IntDivOp);
0x1: rem({{
constexpr int64_t kRsMin = \
std::numeric_limits<int64_t>::min();
if (Rs2_sd == 0) {
Rd = Rs1_sd;
} else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
Rd = 0;
} else {
Rd = Rs1_sd%Rs2_sd;
}
}}, IntDivOp);
}
0x1: rem({{
if (machInst.rv_type == RV32) {
Rd_sd = rem<int32_t>(Rs1, Rs2);
} else {
Rd_sd = rem<int64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x5: max({{
Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
}});
@@ -1365,22 +1268,13 @@ decode QUADRANT default Unknown::unknown() {
0x0: and({{
Rd = rvSext(Rs1 & Rs2);
}});
0x1: decode RVTYPE {
0x0: rv32_remu({{
if (Rs2_uw == 0) {
Rd_sw = Rs1_uw;
} else {
Rd_sw = Rs1_uw%Rs2_uw;
}
}}, IntDivOp);
0x1: remu({{
if (Rs2 == 0) {
Rd = Rs1;
} else {
Rd = Rs1%Rs2;
}
}}, IntDivOp);
}
0x1: remu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
} else {
Rd = remu<uint64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x4: packh({{
// It doesn't need to sign ext as MSB is always 0
Rd = (Rs2_ub << 8) | Rs1_ub;
@@ -1432,15 +1326,7 @@ decode QUADRANT default Unknown::unknown() {
}
0x4: decode FUNCT7 {
0x1: divw({{
constexpr int32_t kRsMin = \
std::numeric_limits<int32_t>::min();
if (Rs2_sw == 0) {
Rd_sd = -1;
} else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
Rd_sd = kRsMin;
} else {
Rd_sd = Rs1_sw/Rs2_sw;
}
Rd_sd = div<int32_t>(Rs1, Rs2);
}}, IntDivOp);
0x4: packw({{
Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
@@ -1454,11 +1340,7 @@ decode QUADRANT default Unknown::unknown() {
Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
}});
0x1: divuw({{
if (Rs2_uw == 0) {
Rd_sd = std::numeric_limits<uint64_t>::max();
} else {
Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
}
Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
}}, IntDivOp);
0x20: sraw({{
Rd_sd = Rs1_sw >> Rs2<4:0>;
@@ -1470,26 +1352,14 @@ decode QUADRANT default Unknown::unknown() {
}
0x6: decode FUNCT7 {
0x1: remw({{
constexpr int32_t kRsMin = \
std::numeric_limits<int32_t>::min();
if (Rs2_sw == 0) {
Rd_sd = Rs1_sw;
} else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
Rd_sd = 0;
} else {
Rd_sd = Rs1_sw%Rs2_sw;
}
Rd_sd = rem<int32_t>(Rs1, Rs2);
}}, IntDivOp);
0x10: sh3add_uw({{
Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
}});
}
0x7: remuw({{
if (Rs2_uw == 0) {
Rd_sd = (int32_t)Rs1_uw;
} else {
Rd_sd = (int32_t)(Rs1_uw%Rs2_uw);
}
Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
}}, IntDivOp);
}
}

View File

@@ -55,6 +55,7 @@
#include "cpu/reg_class.hh"
#include "cpu/static_inst.hh"
#include "cpu/thread_context.hh"
#include "enums/RiscvType.hh"
#include "rvk.hh"
namespace gem5
@@ -137,6 +138,101 @@ registerName(RegId reg)
}
}
inline uint32_t
mulhu_32(uint32_t rs1, uint32_t rs2)
{
return ((uint64_t)rs1 * rs2) >> 32;
}
inline uint64_t
mulhu_64(uint64_t rs1, uint64_t rs2)
{
uint64_t rs1_lo = (uint32_t)rs1;
uint64_t rs1_hi = rs1 >> 32;
uint64_t rs2_lo = (uint32_t)rs2;
uint64_t rs2_hi = rs2 >> 32;
uint64_t hi = rs1_hi * rs2_hi;
uint64_t mid1 = rs1_hi * rs2_lo;
uint64_t mid2 = rs1_lo * rs2_hi;
uint64_t lo = rs1_lo * rs2_lo;
uint64_t carry = ((uint64_t)(uint32_t)mid1
+ (uint64_t)(uint32_t)mid2
+ (lo >> 32)) >> 32;
return hi + (mid1 >> 32) + (mid2 >> 32) + carry;
}
inline int32_t
mulh_32(int32_t rs1, int32_t rs2)
{
return ((int64_t)rs1 * rs2) >> 32;
}
inline int64_t
mulh_64(int64_t rs1, int64_t rs2)
{
bool negate = (rs1 < 0) != (rs2 < 0);
uint64_t res = mulhu_64(std::abs(rs1), std::abs(rs2));
return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
}
inline int32_t
mulhsu_32(int32_t rs1, uint32_t rs2)
{
return ((int64_t)rs1 * rs2) >> 32;
}
inline int64_t
mulhsu_64(int64_t rs1, uint64_t rs2)
{
bool negate = rs1 < 0;
uint64_t res = mulhu_64(std::abs(rs1), rs2);
return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
}
template<typename T> inline T
div(T rs1, T rs2)
{
constexpr T kRsMin = std::numeric_limits<T>::min();
if (rs2 == 0) {
return -1;
} else if (rs1 == kRsMin && rs2 == -1) {
return kRsMin;
} else {
return rs1 / rs2;
}
}
template<typename T> inline T
divu(T rs1, T rs2)
{
if (rs2 == 0) {
return std::numeric_limits<T>::max();
} else {
return rs1 / rs2;
}
}
template<typename T> inline T
rem(T rs1, T rs2)
{
constexpr T kRsMin = std::numeric_limits<T>::min();
if (rs2 == 0) {
return rs1;
} else if (rs1 == kRsMin && rs2 == -1) {
return 0;
} else {
return rs1 % rs2;
}
}
template<typename T> inline T
remu(T rs1, T rs2)
{
return (rs2 == 0) ? rs1 : rs1 % rs2;
}
} // namespace RiscvISA
} // namespace gem5