arch-arm: implement floating point aarch32 VCVTA family

These instructions round floating point to integer, and were added to aarch32 as an extension to ARMv7. Change-Id: I62d1705badc95a4e8954a5ad62b2b6bc9e4ffe00 Reviewed-on: https://gem5-review.googlesource.com/c/16788 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
2019-02-18 18:06:45 +00:00
parent b48e4a90bf
commit 2c242d665f
2 changed files with 200 additions and 105 deletions
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
-// Copyright (c) 2010-2011, 2016-2018 ARM Limited
+// Copyright (c) 2010-2011, 2016-2019 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -2001,6 +2001,26 @@ let {{
    decodeShortFpTransfer(ExtMachInst machInst);
    '''
    decoder_output = '''
    IntRegIndex decodeFpVd(ExtMachInst machInst, uint32_t size, bool isInt)
    {
        if (!isInt and size == 3) {
            return (IntRegIndex)((bits(machInst, 22) << 5) |
                               (bits(machInst, 15, 12) << 1));
        } else {
            return (IntRegIndex)(bits(machInst, 22) |
                              (bits(machInst, 15, 12) << 1));
        }
    }
    IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt)
    {
        if (!isInt and size == 3) {
            return (IntRegIndex)((bits(machInst, 5) << 5) |
                               (bits(machInst, 3, 0) << 1));
        } else {
            return (IntRegIndex)(bits(machInst, 5) |
                              (bits(machInst, 3, 0) << 1));
        }
    }
    StaticInstPtr
    decodeShortFpTransfer(ExtMachInst machInst)
    {
@@ -2008,67 +2028,143 @@ let {{
        const uint32_t c = bits(machInst, 8);
        const uint32_t a = bits(machInst, 23, 21);
        const uint32_t b = bits(machInst, 6, 5);
        const uint32_t o1 = bits(machInst, 18);
        if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
            (machInst.thumb == 0 && machInst.condCode == 0xf)) {
            // Determine if this is backported aarch64 FP instruction
            const bool b31_b24 = bits(machInst, 31, 24) == 0xFE;
            const bool b23 = bits(machInst, 23);
-            const bool b21_b18 = bits(machInst, 21, 18) == 0xE;
+            const bool b21_b19 = bits(machInst, 21, 19) == 0x7;
            const bool b11_b9  = bits(machInst, 11, 9) == 0x5;
-            const bool sz = bits(machInst, 8);
+            const uint32_t size = bits(machInst, 9, 8);
-            const bool b7_b6   = bits(machInst, 7, 6) == 0x1;
+            const bool op3 = bits(machInst, 6);
            const bool b6 = bits(machInst, 6) == 0x0;
            const bool b4 = bits(machInst, 4) == 0x0;
-            if (b31_b24 && b23 && b21_b18 && b11_b9 && b7_b6 && b4) {
+            const uint32_t rm = bits(machInst, 17, 16);
-                  // VINT* Integer Rounding Instructon
+            IntRegIndex vd = decodeFpVd(machInst, size, false);
-                  const uint32_t rm = bits(machInst, 17, 16);
+            IntRegIndex vm = decodeFpVm(machInst, size, false);
-
+            IntRegIndex vdInt = decodeFpVd(machInst, size, true);
-                  if (sz) {
+            if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) {
-                      const IntRegIndex vd =
+                if (o1 == 0) {
-                          (IntRegIndex)((bits(machInst, 22) << 5) |
+                    // VINT* Integer Rounding Instruction
-                                        (bits(machInst, 15, 12) << 1));
+                    if (size == 3) {
-                      const IntRegIndex vm =
+                        switch(rm) {
-                          (IntRegIndex)((bits(machInst, 5) << 5) |
+                            case 0x0:
-                                        (bits(machInst, 3, 0) << 1));
+                            return decodeVfpRegRegOp<VRIntAD>(machInst, vd, vm,
-                      switch(rm) {
+                                                                true);
-                        case 0x0:
+                            case 0x1:
-                          return decodeVfpRegRegOp<VRIntAD>(machInst, vd, vm,
+                            return decodeVfpRegRegOp<VRIntND>(machInst, vd, vm,
-                                                            true);
+                                                                true);
-                        case 0x1:
+                            case 0x2:
-                          return decodeVfpRegRegOp<VRIntND>(machInst, vd, vm,
+                            return decodeVfpRegRegOp<VRIntPD>(machInst, vd, vm,
-                                                            true);
+                                                                true);
-                        case 0x2:
+                            case 0x3:
-                          return decodeVfpRegRegOp<VRIntPD>(machInst, vd, vm,
+                            return decodeVfpRegRegOp<VRIntMD>(machInst, vd, vm,
-                                                            true);
+                                                                true);
-                        case 0x3:
+                            default: return new Unknown(machInst);
-                          return decodeVfpRegRegOp<VRIntMD>(machInst, vd, vm,
+                        }
-                                                            true);
+                    } else {
-                        default: return new Unknown(machInst);
+                        switch(rm) {
-                      }
+                            case 0x0:
-                  } else {
+                            return decodeVfpRegRegOp<VRIntAS>(machInst, vd, vm,
-                      const IntRegIndex vd =
+                                                                false);
-                          (IntRegIndex)(bits(machInst, 22) |
+                            case 0x1:
-                                       (bits(machInst, 15, 12) << 1));
+                            return decodeVfpRegRegOp<VRIntNS>(machInst, vd, vm,
-                      const IntRegIndex vm =
+                                                                false);
-                          (IntRegIndex)(bits(machInst, 5) |
+                            case 0x2:
-                                        (bits(machInst, 3, 0) << 1));
+                            return decodeVfpRegRegOp<VRIntPS>(machInst, vd, vm,
-                      switch(rm) {
+                                                                false);
-                        case 0x0:
+                            case 0x3:
-                          return decodeVfpRegRegOp<VRIntAS>(machInst, vd, vm,
+                            return decodeVfpRegRegOp<VRIntMS>(machInst, vd, vm,
-                                                            false);
+                                                                false);
-                        case 0x1:
+                            default: return new Unknown(machInst);
-                          return decodeVfpRegRegOp<VRIntNS>(machInst, vd, vm,
+                        }
-                                                            false);
+                    }
-                        case 0x2:
+                } else {
-                          return decodeVfpRegRegOp<VRIntPS>(machInst, vd, vm,
+                    const bool op = bits(machInst, 7);
-                                                            false);
+                    switch(rm) {
-                        case 0x3:
+                      case 0x0:
-                          return decodeVfpRegRegOp<VRIntMS>(machInst, vd, vm,
+                        switch(size) {
-                                                            false);
+                          case 0x0:
-                        default: return new Unknown(machInst);
+                            return new Unknown(machInst);
-                      }
+                          case 0x1:
-                  }
+                            return new FailUnimplemented(
-            } else if (b31_b24 && !b23 && b11_b9 && b6 && b4){
+                                "vcvta.u32.f16", machInst);
                          case 0x2:
                            if (op) {
                                return new VcvtaFpSIntS(machInst, vdInt, vm);
                            } else {
                                return new VcvtaFpUIntS(machInst, vdInt, vm);
                            }
                          case 0x3:
                            if (op) {
                                return new VcvtaFpSIntD(machInst, vdInt, vm);
                            } else {
                                return new VcvtaFpUIntD(machInst, vdInt, vm);
                            }
                        }
                      case 0x1:
                        switch(size) {
                          case 0x0:
                            return new Unknown(machInst);
                          case 0x1:
                            return new FailUnimplemented(
                                "vcvtn.u32.f16", machInst);
                          case 0x2:
                            if (op) {
                                return new VcvtnFpSIntS(machInst, vdInt, vm);
                            } else {
                                return new VcvtnFpUIntS(machInst, vdInt, vm);
                            }
                          case 0x3:
                            if (op) {
                                return new VcvtnFpSIntD(machInst, vdInt, vm);
                            } else {
                                return new VcvtnFpUIntD(machInst, vdInt, vm);
                            }
                        }
                      case 0x2:
                        switch(size) {
                          case 0x0:
                            return new Unknown(machInst);
                          case 0x1:
                            return new FailUnimplemented(
                                "vcvtp.u32.f16", machInst);
                          case 0x2:
                            if (op) {
                                return new VcvtpFpSIntS(machInst, vdInt, vm);
                            } else {
                                return new VcvtpFpUIntS(machInst, vdInt, vm);
                            }
                          case 0x3:
                            if (op) {
                                return new VcvtpFpSIntD(machInst, vdInt, vm);
                            } else {
                                return new VcvtpFpUIntD(machInst, vdInt, vm);
                            }
                        }
                      case 0x3:
                        switch(size) {
                          case 0x0:
                            return new Unknown(machInst);
                          case 0x1:
                            return new FailUnimplemented(
                                "vcvtm.u32.f16", machInst);
                          case 0x2:
                            if (op) {
                                return new VcvtmFpSIntS(machInst, vdInt, vm);
                            } else {
                                return new VcvtmFpUIntS(machInst, vdInt, vm);
                            }
                          case 0x3:
                            if (op) {
                                return new VcvtmFpSIntD(machInst, vdInt, vm);
                            } else {
                                return new VcvtmFpUIntD(machInst, vdInt, vm);
                            }
                        }
                    }
                }
            } else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){
                // VSEL* floating point conditional select
                ConditionCode cond;
@@ -2079,24 +2175,12 @@ let {{
                  case 0x3: cond = COND_GT; break;
                }
-                if (sz) {
+                if (size == 3) {
                      const IntRegIndex vd =
                          (IntRegIndex)((bits(machInst, 22) << 5) |
                                        (bits(machInst, 15, 12) << 1));
                      const IntRegIndex vm =
                          (IntRegIndex)((bits(machInst, 5) << 5) |
                                        (bits(machInst, 3, 0) << 1));
                      const IntRegIndex vn =
                          (IntRegIndex)((bits(machInst, 7) << 5) |
                                       (bits(machInst, 19, 16) << 1));
                    return new VselD(machInst, vd, vn, vm, cond);
                } else {
                      const IntRegIndex vd =
                          (IntRegIndex)(bits(machInst, 22) |
                                       (bits(machInst, 15, 12) << 1));
                      const IntRegIndex vm =
                          (IntRegIndex)(bits(machInst, 5) |
                                        (bits(machInst, 3, 0) << 1));
                      const IntRegIndex vn =
                          (IntRegIndex)((bits(machInst, 19, 16) << 1) |
                                        bits(machInst, 7));
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -1,6 +1,6 @@
 // -*- mode:c++ -*-
-// Copyright (c) 2010-2013,2016 ARM Limited
+// Copyright (c) 2010-2013,2016,2018-2019 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -993,85 +993,96 @@ let {{
    decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDRIop);
    exec_output += PredOpExecute.subst(vcvtFpSIntDRIop);
-    vcvtFpUIntSCode = vfpEnabledCheckCode + '''
+    round_mode_suffix_to_mode = {
        '': 'VfpRoundZero',
        'a': 'VfpRoundAway',
        'm': 'VfpRoundDown',
        'n': 'VfpRoundNearest',
        'p': 'VfpRoundUpward',
    }
    def buildVcvt(code, className, roundModeSuffix):
        global header_output, decoder_output, exec_output, \
            vfpEnabledCheckCode, round_mode_suffix_to_mode
        full_code = vfpEnabledCheckCode + code.format(
            round_mode=round_mode_suffix_to_mode[roundModeSuffix],
        )
        iop = InstObjParams(
            "vcvt{}".format(roundModeSuffix),
            className.format(roundModeSuffix),
            "FpRegRegOp",
            { "code": full_code,
              "predicate_test": predicateTest,
              "op_class": "SimdFloatCvtOp" },
            []
        )
        header_output += FpRegRegOpDeclare.subst(iop);
        decoder_output += FpRegRegOpConstructor.subst(iop);
        exec_output += PredOpExecute.subst(iop);
    code = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        vfpFlushToZero(fpscr, FpOp1);
        VfpSavedState state = prepFpState(fpscr.rMode);
        fesetround(FeRoundZero);
        __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0);
+        FpDest_uw = vfpFpToFixed<float>(
            FpOp1, false, 32, 0, true, {round_mode});
        __asm__ __volatile__("" :: "m" (FpDest_uw));
        finishVfp(fpscr, state, fpscr.fz);
        FpscrExc = fpscr;
    '''
-    vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
+    for round_mode_suffix in round_mode_suffix_to_mode:
-                                     { "code": vcvtFpUIntSCode,
+        buildVcvt(code, "Vcvt{}FpUIntS", round_mode_suffix)
                                       "predicate_test": predicateTest,
                                       "op_class": "SimdFloatCvtOp" }, [])
    header_output += FpRegRegOpDeclare.subst(vcvtFpUIntSIop);
    decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntSIop);
    exec_output += PredOpExecute.subst(vcvtFpUIntSIop);
-    vcvtFpUIntDCode = vfpEnabledCheckCode + '''
+    code = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
        vfpFlushToZero(fpscr, cOp1);
        VfpSavedState state = prepFpState(fpscr.rMode);
        fesetround(FeRoundZero);
        __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0);
+        uint64_t result = vfpFpToFixed<double>(
            cOp1, false, 32, 0, true, {round_mode});
        __asm__ __volatile__("" :: "m" (result));
        finishVfp(fpscr, state, fpscr.fz);
        FpDestP0_uw = result;
        FpscrExc = fpscr;
    '''
-    vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "FpRegRegOp",
+    for round_mode_suffix in round_mode_suffix_to_mode:
-                                     { "code": vcvtFpUIntDCode,
+        buildVcvt(code, "Vcvt{}FpUIntD", round_mode_suffix)
                                       "predicate_test": predicateTest,
                                       "op_class": "SimdFloatCvtOp" }, [])
    header_output += FpRegRegOpDeclare.subst(vcvtFpUIntDIop);
    decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntDIop);
    exec_output += PredOpExecute.subst(vcvtFpUIntDIop);
-    vcvtFpSIntSCode = vfpEnabledCheckCode + '''
+    code = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        vfpFlushToZero(fpscr, FpOp1);
        VfpSavedState state = prepFpState(fpscr.rMode);
        fesetround(FeRoundZero);
        __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
-        FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0);
+        FpDest_sw = vfpFpToFixed<float>(
            FpOp1, true, 32, 0, true, {round_mode});
        __asm__ __volatile__("" :: "m" (FpDest_sw));
        finishVfp(fpscr, state, fpscr.fz);
        FpscrExc = fpscr;
    '''
-    vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
+    for round_mode_suffix in round_mode_suffix_to_mode:
-                                     { "code": vcvtFpSIntSCode,
+        buildVcvt(code, "Vcvt{}FpSIntS", round_mode_suffix)
                                       "predicate_test": predicateTest,
                                       "op_class": "SimdFloatCvtOp" }, [])
    header_output += FpRegRegOpDeclare.subst(vcvtFpSIntSIop);
    decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntSIop);
    exec_output += PredOpExecute.subst(vcvtFpSIntSIop);
-    vcvtFpSIntDCode = vfpEnabledCheckCode + '''
+    code = '''
        FPSCR fpscr = (FPSCR) FpscrExc;
        double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
        vfpFlushToZero(fpscr, cOp1);
        VfpSavedState state = prepFpState(fpscr.rMode);
        fesetround(FeRoundZero);
        __asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
-        int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0);
+        int64_t result = vfpFpToFixed<double>(
            cOp1, true, 32, 0, true, {round_mode});
        __asm__ __volatile__("" :: "m" (result));
        finishVfp(fpscr, state, fpscr.fz);
        FpDestP0_uw = result;
        FpscrExc = fpscr;
    '''
-    vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "FpRegRegOp",
+    for round_mode_suffix in round_mode_suffix_to_mode:
-                                     { "code": vcvtFpSIntDCode,
+        buildVcvt(code, "Vcvt{}FpSIntD", round_mode_suffix)
                                       "predicate_test": predicateTest,
                                       "op_class": "SimdFloatCvtOp" }, [])
    header_output += FpRegRegOpDeclare.subst(vcvtFpSIntDIop);
    decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDIop);
    exec_output += PredOpExecute.subst(vcvtFpSIntDIop);
    vcvtFpSFpDCode = vfpEnabledCheckCode + '''
        FPSCR fpscr = (FPSCR) FpscrExc;