From af98e173641e213564c964e04d1abb75f4201659 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.ibm.com>
Date: Sat, 6 Feb 2021 17:17:31 +0530
Subject: [PATCH] arch-power: Fix arithmetic instructions

The latest Power ISA introduces two new bits that record
carry and overflow out of bit 31 of the result, namely
CA32 and OV32 respectively, thereby changing the behaviour
of the add and subtract instructions that set them. Also,
now that 64-bit registers are being used, the nature of
the result, i.e. less than, greater than or equal to zero,
must be set by a 64-bit signed comparison of the result
to zero. This fixes the following instructions.
  * Add Immediate (addi)
  * Add Immediate Shifted (addis)
  * Add (add[o][.])
  * Subtract From (subf[o][.])
  * Add Immediate Carrying (addic)
  * Add Immediate Carrying and Record (addic.)
  * Subtract From Immediate Carrying (subfic)
  * Add Carrying (addc[o][.])
  * Subtract From Carrying (subfc[o][.])
  * Add Extended (adde[o][.])
  * Subtract From Extended (subfe[o][.])
  * Add to Zero Extended (addze[o][.])
  * Subtract From Zero Extended (subfze[o][.])
  * Negate (neg[o][.])
  * Multiply Low Immediate (mulli)
  * Multiply Low Word (mullw[o][.])
  * Multiply High Word (mulhw[.])
  * Multiply High Word Unsigned (mulhwu[.])
  * Divide Word (divw[o][.])
  * Divide Word Unsigned (divwu[o][.])

Change-Id: I8c79f1dca8b19010ed7b734d7ec9bb598df428c3
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/40899
Reviewed-by: Boris Shingarov <shingarov@labware.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/power/isa/decoder.isa         | 30 +++++++++++++-------------
 src/arch/power/isa/formats/integer.isa | 18 ++++++++++++++--
 src/arch/power/regs/misc.hh            |  2 ++
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index e993a7bff5..cc3b9f4013 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -38,9 +38,8 @@ decode PO default Unknown::unknown() {
 
     format IntImmArithOp {
         7: mulli({{
-            int32_t src = Ra_sw;
-            int64_t prod = src * si;
-            Rt = (uint32_t)prod;
+            int64_t res = Ra_sd * si;
+            Rt = res;
         }});
 
         8: subfic({{
@@ -486,15 +485,17 @@ decode PO default Unknown::unknown() {
             }
 
             11: IntArithCheckRcOp::mulhwu({{
-                uint64_t prod = Ra_ud * Rb_ud;
-                Rt = prod >> 32;
+                uint64_t res = (uint64_t)Ra_uw * Rb_uw;
+                res = res >> 32;
+                Rt = res;
             }});
 
             40: IntSumOp::subf({{ ~Ra }}, {{ Rb }}, {{ 1 }});
 
             75: IntArithCheckRcOp::mulhw({{
-                int64_t prod = Ra_sd * Rb_sd;
-                Rt = prod >> 32;
+                uint64_t res = (int64_t)Ra_sw * Rb_sw;
+                res = res >> 32;
+                Rt = res;
             }});
 
             format IntSumOp {
@@ -508,19 +509,19 @@ decode PO default Unknown::unknown() {
             }
 
             235: IntArithCheckRcOp::mullw({{
-                int64_t prod = Ra_sd * Rb_sd;
-                Rt = prod;
-                if (prod != (int32_t)prod) {
+                int64_t res = (int64_t)Ra_sw * Rb_sw;
+                if (res != (int32_t)res) {
                     setOV = true;
                 }
+                Rt = res;
             }}, true);
 
             266: IntSumOp::add({{ Ra }}, {{ Rb }});
 
             format IntArithCheckRcOp {
                 459: divwu({{
-                    uint32_t src1 = Ra_sw;
-                    uint32_t src2 = Rb_sw;
+                    uint32_t src1 = Ra_uw;
+                    uint32_t src2 = Rb_uw;
                     if (src2 != 0) {
                         Rt = src1 / src2;
                     } else {
@@ -532,9 +533,8 @@ decode PO default Unknown::unknown() {
                 491: divw({{
                     int32_t src1 = Ra_sw;
                     int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
+                    if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
+                        Rt = (uint32_t)(src1 / src2);
                     } else {
                         Rt = 0;
                         setOV = true;
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa
index b0840ce413..8583ba09a9 100644
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -44,28 +44,42 @@ computeCR0Code = '''
 '''
 
 computeCACode = '''
-    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findCarry(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ca = 1;
     } else {
         xer.ca = 0;
     }
+
+    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ca32 = 1;
+    } else {
+        xer.ca32 = 0;
+    }
 '''
 
 computeOVCode = '''
-    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findOverflow(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ov = 1;
         xer.so = 1;
     } else {
         xer.ov = 0;
     }
+
+    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ov32 = 1;
+    } else {
+        xer.ov32 = 0;
+    }
 '''
 
 setOVCode = '''
     if (setOV) {
         xer.ov = 1;
+        xer.ov32 = 1;
         xer.so = 1;
     } else {
         xer.ov = 0;
+        xer.ov32 = 0;
     }
 '''
 
diff --git a/src/arch/power/regs/misc.hh b/src/arch/power/regs/misc.hh
index 6a998166e5..1665e280dd 100644
--- a/src/arch/power/regs/misc.hh
+++ b/src/arch/power/regs/misc.hh
@@ -56,6 +56,8 @@ BitUnion32(Xer)
     Bitfield<31> so;
     Bitfield<30> ov;
     Bitfield<29> ca;
+    Bitfield<19> ov32;
+    Bitfield<18> ca32;
 EndBitUnion(Xer)
 
 BitUnion32(Fpscr)