From 930419bcb2487a7e26a227b8f26ca0d735b65edc Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Sat, 6 Feb 2021 17:17:45 +0530 Subject: [PATCH] arch-power: Add doubleword multiply instructions This introduces 128-bit multiplication helpers and adds the following instructions. * Multiply Low Doubleword (mulld[o][.]) * Multiply High Doubleword (mulhd[.]) * Multiply High Doubleword Unsigned (mulhdu[.]) Change-Id: Id579c95468ffe5fe7b5164579ec1dfb18f0b3ab3 Signed-off-by: Sandipan Das Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40903 Reviewed-by: Boris Shingarov Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/arch/power/insts/integer.hh | 43 ++++++++++++++++++++++++ src/arch/power/isa/decoder.isa | 58 ++++++++++++++++++++++++--------- 2 files changed, 85 insertions(+), 16 deletions(-) diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh index aafbbec0fc..95f15983f4 100644 --- a/src/arch/power/insts/integer.hh +++ b/src/arch/power/insts/integer.hh @@ -134,6 +134,49 @@ class IntArithOp : public IntOp { } + /** + * Compute 128-bit product of 64-bit unsigned integer multiplication + * based on https://stackoverflow.com/a/28904636 + */ + inline std::tuple + multiply(uint64_t ra, uint64_t rb) const + { + uint64_t plo, phi; + #if defined(__SIZEOF_INT128__) + __uint128_t prod = (__uint128_t)ra * rb; + plo = prod; + phi = prod >> 64; + #else + uint64_t ralo = (uint32_t)ra, rahi = ra >> 32; + uint64_t rblo = (uint32_t)rb, rbhi = rb >> 32; + uint64_t pp0 = ralo * rblo; + uint64_t pp1 = rahi * rblo; + uint64_t pp2 = ralo * rbhi; + uint64_t pp3 = rahi * rbhi; + uint64_t c = ((uint32_t)pp1) + ((uint32_t)pp2) + (pp0 >> 32); + phi = pp3 + (pp2 >> 32) + (pp1 >> 32) + (c >> 32); + plo = (c << 32) | ((uint32_t)pp0); + #endif + return std::make_tuple(plo, phi); + } + + /* Compute 128-bit product of 64-bit signed integer multiplication */ + inline std::tuple + multiply(int64_t ra, int64_t rb) const + { + uint64_t plo, phi; + #if defined(__SIZEOF_INT128__) + __int128_t prod = (__int128_t)ra * rb; + plo = prod; + phi = prod >> 64; + #else + std::tie(plo, phi) = multiply((uint64_t)ra, (uint64_t)rb); + if (rb < 0) phi -= (uint64_t)ra; + if (ra < 0) phi -= (uint64_t)rb; + #endif + return std::make_tuple(plo, (int64_t)phi); + } + std::string generateDisassembly( Addr pc, const Loader::SymbolTable *symtab) const override; }; diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa index beacd6facf..b4c90fcf79 100644 --- a/src/arch/power/isa/decoder.isa +++ b/src/arch/power/isa/decoder.isa @@ -483,10 +483,15 @@ decode PO default Unknown::unknown() { // These instructions are of XO form with bit 21 as the OE bit. default: decode XO_XO { - format IntSumOp { - 8: subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }}, true); - 10: addc({{ Ra }}, {{ Rb }}, computeCA = true); - } + 8: IntSumOp::subfc({{ ~Ra }}, {{ Rb }}, {{ 1 }}, true); + + 9: IntArithCheckRcOp::mulhdu({{ + uint64_t res; + std::tie(std::ignore, res) = multiply(Ra, Rb); + Rt = res; + }}); + + 10: IntSumOp::addc({{ Ra }}, {{ Rb }}, computeCA = true); 11: IntArithCheckRcOp::mulhwu({{ uint64_t res = (uint64_t)Ra_uw * Rb_uw; @@ -496,11 +501,19 @@ decode PO default Unknown::unknown() { 40: IntSumOp::subf({{ ~Ra }}, {{ Rb }}, {{ 1 }}); - 75: IntArithCheckRcOp::mulhw({{ - uint64_t res = (int64_t)Ra_sw * Rb_sw; - res = res >> 32; - Rt = res; - }}); + format IntArithCheckRcOp { + 73: mulhd({{ + int64_t res; + std::tie(std::ignore, res) = multiply(Ra_sd, Rb_sd); + Rt = res; + }}); + + 75: mulhw({{ + uint64_t res = (int64_t)Ra_sw * Rb_sw; + res = res >> 32; + Rt = res; + }}); + } format IntSumOp { 104: neg({{ ~Ra }}, {{ 1 }}); @@ -512,13 +525,26 @@ decode PO default Unknown::unknown() { 234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }}, true); } - 235: IntArithCheckRcOp::mullw({{ - int64_t res = (int64_t)Ra_sw * Rb_sw; - if (res != (int32_t)res) { - setOV = true; - } - Rt = res; - }}, true); + format IntArithCheckRcOp { + 233: mulld({{ + int64_t src1 = Ra_sd; + int64_t src2 = Rb_sd; + uint64_t res = src1 * src2; + std::tie(res, std::ignore) = multiply(src1, src2); + if (src1 != 0 && (int64_t)res / src1 != src2) { + setOV = true; + } + Rt = res; + }}, true); + + 235: mullw({{ + int64_t res = (int64_t)Ra_sw * Rb_sw; + if (res != (int32_t)res) { + setOV = true; + } + Rt = res; + }}, true); + } 266: IntSumOp::add({{ Ra }}, {{ Rb }});