arch-arm: Implementation ARMv8.1 RDMA

Adding RDMA implementation for ARMv8.1 + isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions + isa/insts/neon.isa\neon64.isa: Adding function instructions Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015 Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com> Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com> Tested-by: kokoro <noreply+kokoro@google.com>
2020-09-14 18:08:38 +02:00
parent caf6a507cb
commit b0dbc09b3f
6 changed files with 268 additions and 23 deletions
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -80,7 +80,7 @@ class ArmISA(BaseISA):
    id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute Register 2")
    id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute Register 3")
    id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4")
-    id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute Register 5")
+    id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute Register 5")

    fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")

@@ -98,8 +98,8 @@ class ArmISA(BaseISA):
    id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
        "AArch64 Debug Feature Register 1")

-    # !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES
-    id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
+    # !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES
+    id_aa64isar0_el1 = Param.UInt64(0x0000000010000000,
        "AArch64 Instruction Set Attribute Register 0")

    # GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -2975,6 +2975,8 @@ namespace Aarch64
            } else {
                return new Unknown64(machInst);
            }
+        } else if (bits(machInst, 15) && bits(machInst, 10) == 1) {
+            return decodeNeonSc3SameExtra(machInst);
        } else if (bits(machInst, 23, 22) == 0 &&
                   bits(machInst, 15) == 0) {
            if (bits(machInst, 10) == 1) {
--- a/src/arch/arm/isa/formats/fp.isa
+++ b/src/arch/arm/isa/formats/fp.isa
@@ -652,7 +652,10 @@ let {{
            }
          case 0xb:
            if (o1) {
-                if (u || q) {
+                if (u) {
+                    return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
+                            q, size, machInst, vd, vn, vm);
+                } else if (q) {
                    return new Unknown(machInst);
                } else {
                    return decodeNeonUThreeUSReg<NVpaddD>(
@@ -669,7 +672,10 @@ let {{
            }
          case 0xc:
            if (o1) {
-                if (!u) {
+                if (u) {
+                    return decodeNeonSThreeSReg<VqrdmlshD, VqrdmlshQ>(
+                            q, size, machInst, vd, vn, vm);
+                } else {
                    if (bits(size, 1) == 0) {
                        if (q) {
                            return new NVfmaQFp<float>(machInst, vd, vn, vm);
@@ -1504,6 +1510,54 @@ let {{
                    return new Unknown(machInst);
                }
            }
+          case 0xe:
+            if (u) {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsQ<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsQ<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            } else {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlahsD<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlahsD<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            }
+          case 0xf:
+            if (u) {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlshsQ<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlshsQ<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            } else {
+                switch (size) {
+                  case 1:
+                    return new VqrdmlshsD<int16_t>(
+                            machInst, vd, vn, vm, index);
+                  case 2:
+                    return new VqrdmlshsD<int32_t>(
+                            machInst, vd, vn, vm, index);
+                  default:
+                    return new Unknown(machInst);
+                }
+            }
        }
        return new Unknown(machInst);
    }
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -66,6 +66,8 @@ namespace Aarch64

    // AdvSIMD scalar three same
    inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
+    // AdvSIMD scalar three same extra
+    inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst);
    // AdvSIMD scalar three different
    inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
    // AdvSIMD scalar two-reg misc
@@ -516,6 +518,20 @@ namespace Aarch64
        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);

        switch (opcode) {
+          case 0x10:
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
+                    size, machInst, vd, vn, vm);
+          case 0x11:
+            if (q)
+                return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
+                    size, machInst, vd, vn, vm);
+            else
+                return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
+                    size, machInst, vd, vn, vm);
          case 0x18:
          case 0x19:
          case 0x1a:
@@ -1531,10 +1547,16 @@ namespace Aarch64
                return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
                    q, size, machInst, vd, vn, vm, index);
          case 0xd:
-            if (u || (size == 0x0 || size == 0x3))
-                return new Unknown64(machInst);
+            if (u)
+                return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
+                                                   SqrdmlahElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
            else
-                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>(
+                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
+                                                   SqrdmulhElemQX>(
+                    q, size, machInst, vd, vn, vm, index);
+          case 0xf:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
                    q, size, machInst, vd, vn, vm, index);
          default:
            return new Unknown64(machInst);
@@ -2105,6 +2127,28 @@ namespace Aarch64
        }
    }

+    StaticInstPtr
+    decodeNeonSc3SameExtra(ExtMachInst machInst)
+    {
+        uint8_t size = bits(machInst, 23, 22);
+        uint8_t opcode = bits(machInst, 15, 11);
+
+        IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
+        IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
+        IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        switch (opcode) {
+          case 0x10:
+            return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
+                size, machInst, vd, vn, vm);
+          case 0x11:
+            return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
+                size, machInst, vd, vn, vm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
    StaticInstPtr
    decodeNeonSc3Diff(ExtMachInst machInst)
    {
@@ -2434,10 +2478,9 @@ namespace Aarch64
        }
        IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);

-        if (u && opcode != 9)
-            return new Unknown64(machInst);
+        uint8_t u_opcode = opcode | u << 4;

-        switch (opcode) {
+        switch (u_opcode) {
          case 0x1:
            if (size < 2 || sz_L == 0x3)
                return new Unknown64(machInst);
@@ -2465,11 +2508,7 @@ namespace Aarch64
          case 0x9:
            if (size < 2 || sz_L == 0x3)
                return new Unknown64(machInst);
-            if (u)
-                return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
-                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
-            else
-                return decodeNeonUThreeImmScFpReg<FmulElemScX>(
+            return decodeNeonUThreeImmScFpReg<FmulElemScX>(
                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
          case 0xb:
            if (size == 0x0 || size == 0x3)
@@ -2484,10 +2523,20 @@ namespace Aarch64
                return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
                    size, machInst, vd, vn, vm, index);
          case 0xd:
-            if (size == 0x0 || size == 0x3)
+            return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+                    size, machInst, vd, vn, vm, index);
+          case 0x19:
+            if (size < 2 || sz_L == 0x3)
                return new Unknown64(machInst);
-            else
-                return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
+            return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
+                    size & 0x1, machInst, vd, vn, vm_fp, index_fp);
+
+          case 0x1d:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
+                    size, machInst, vd, vn, vm, index);
+
+          case 0x1f:
+            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
                    size, machInst, vd, vn, vm, index);
          default:
            return new Unknown64(machInst);
--- a/src/arch/arm/isa/insts/neon.isa
+++ b/src/arch/arm/isa/insts/neon.isa
@@ -1147,7 +1147,7 @@ let {{

    def threeEqualRegInst(name, Name, opClass, types, rCount, op,
                          readDest=False, pairwise=False, byElem=False,
-                          standardFpcsr=False, complex=False):
+                          standardFpcsr=False, complex=False, extra=''):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
                    RegVect srcReg1, destReg;
@@ -1203,6 +1203,7 @@ let {{
            }
            ''' % { "op" : op, "readDest" : readDestCode }
        else:
+            eWalkCode += extra
            eWalkCode += '''
            for (unsigned i = 0; i < eCount; i++) {
                Element srcElem1 = letoh(srcReg1.elements[i]);
@@ -1398,7 +1399,8 @@ let {{
        threeUnequalRegInst(name, Name, opClass, types, op,
                            True, False, True, readDest)

-    def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
+    def twoEqualRegInst(name, Name, opClass, types, rCount, op,
+                        readDest=False, extra=''):
        global header_output, exec_output
        eWalkCode = simdEnabledCheckCode + '''
        RegVect srcReg1, srcReg2, destReg;
@@ -1415,6 +1417,7 @@ let {{
        readDestCode = ''
        if readDest:
            readDestCode = 'destElem = letoh(destReg.elements[i]);'
+        eWalkCode += extra
        eWalkCode += '''
        if (imm >= eCount) {
            return std::make_shared<UndefinedInstruction>(machInst, false,
@@ -2783,6 +2786,55 @@ let {{
    threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
    threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)

+
+    vqrdmCode = '''
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          int nbits = sizeof(Element)*8;
+
+          auto val_max = std::numeric_limits<Element>::max();
+          auto val_min = std::numeric_limits<Element>::min();
+          BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+                ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+                ((BigElement)1 << (nbits - 1));
+          unsat_value >>= nbits;
+
+          if (unsat_value > val_max) {
+              fpscr.qc = 1;
+              destElem = val_max;
+          } else if (unsat_value < val_min) {
+              fpscr.qc = 1;
+              destElem = val_min;
+          } else {
+              destElem = unsat_value;
+          }
+          FpscrQc = fpscr;
+    '''
+    code_add = "+"
+    vqrdmlahCode = vqrdmCode % {'code': code_add}
+    rdm_check = '''
+      int sz = bits(machInst, 21, 20);
+      RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5);
+      if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0)
+          return std::make_shared<UndefinedInstruction>(machInst, true);
+      typedef __int128_t BigElement;
+    '''
+    threeEqualRegInst("vqrdmlah", "VqrdmlahD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+
+    code_sub = "-"
+    vqrdmlshCode = vqrdmCode % {'code': code_sub}
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+    threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+
+
    vqrdmulhCode = '''
        FPSCR fpscr = (FPSCR) FpscrQc;
        destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3085,18 @@ let {{
            "SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
    twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
            "SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlsh", "VqrdmlshsD",
+            "SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
+            extra=rdm_check)
+    twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ",
+            "SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
+            extra=rdm_check)

    vshrCode = '''
        if (imm >= sizeof(srcElem1) * 8) {
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -52,7 +52,8 @@ let {{

    def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
                           readDest=False, pairwise=False, scalar=False,
-                           byElem=False, decoder='Generic', complex=False):
+                           byElem=False, decoder='Generic', complex=False,
+                           extra=''):
        assert (not pairwise) or ((not byElem) and (not scalar))
        global header_output, exec_output, decoders
        eWalkCode = simd64EnabledCheckCode + '''
@@ -110,6 +111,7 @@ let {{
                continue;
            }
            '''
+            eWalkCode += extra
            eWalkCode += '''
        for (unsigned i = 0; i < eCount; i++) {
            %(scalarCheck)s
@@ -2336,7 +2338,81 @@ let {{
                     sqnegCode)
    twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
                     sqnegCode, scalar=True)
-    # SQRDMULH (by element)
+    sqrdmCode = '''
+
+          FPSCR fpscr = (FPSCR) FpscrQc;
+          int nbits = sizeof(Element)*8;
+
+          auto val_max = std::numeric_limits<Element>::max();
+          auto val_min = std::numeric_limits<Element>::min();
+          BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
+                ((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
+                ((BigElement)1 << (nbits - 1));
+          unsat_value >>= nbits;
+
+          if (unsat_value > val_max) {
+              fpscr.qc = 1;
+              destElem = val_max;
+          } else if (unsat_value < val_min) {
+              fpscr.qc = 1;
+              destElem = val_min;
+          } else {
+              destElem = unsat_value;
+          }
+          FpscrQc = fpscr;
+    '''
+    code_add = "+"
+    sqrdmlahCode = sqrdmCode % {'code': code_add}
+    rdm_check = '''
+      int sz = bits(machInst, 23, 22);
+      AA64ISAR0 isar0 = xc->tcBase()->readMiscReg( MISCREG_ID_AA64ISAR0_EL1);
+      if (!isar0.rdm || sz == 3 || sz == 0)
+          return std::make_shared<UndefinedInstruction>(machInst, true);
+      typedef __int128_t BigElement;
+    '''
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlahCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
+                       readDest=True, scalar=True, extra=rdm_check)
+    # SQRDMLAH (vector)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlahCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlahCode, scalar=True,
+                       readDest=True, extra=rdm_check)
+    # SQRDMLSH (by element)
+    code_sub = "-"
+    sqrdmlshCode = sqrdmCode % {'code': code_sub}
+
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlshCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
+                       readDest=True, scalar=True, extra=rdm_check)
+    # SQRDMLSH (vector)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 2, sqrdmlshCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode,
+                       readDest=True, extra=rdm_check)
+    threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
+                       ("int16_t", "int32_t"), 4, sqrdmlshCode, scalar=True,
+                       readDest=True, extra=rdm_check)
+    # SQRDMULby element)
    sqrdmulhCode = '''
            FPSCR fpscr = (FPSCR) FpscrQc;
            destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +