arch-arm: Implementation ARMv8.1 RDMA

Adding RDMA implementation for ARMv8.1
    + isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
    + isa/insts/neon.isa\neon64.isa: Adding function instructions

Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Jordi Vaquero
2020-09-14 18:08:38 +02:00
parent caf6a507cb
commit b0dbc09b3f
6 changed files with 268 additions and 23 deletions

View File

@@ -80,7 +80,7 @@ class ArmISA(BaseISA):
id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute Register 2")
id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute Register 3")
id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4")
id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute Register 5")
id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute Register 5")
fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")
@@ -98,8 +98,8 @@ class ArmISA(BaseISA):
id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
"AArch64 Debug Feature Register 1")
# !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES
id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
# !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES
id_aa64isar0_el1 = Param.UInt64(0x0000000010000000,
"AArch64 Instruction Set Attribute Register 0")
# GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1

View File

@@ -2975,6 +2975,8 @@ namespace Aarch64
} else {
return new Unknown64(machInst);
}
} else if (bits(machInst, 15) && bits(machInst, 10) == 1) {
return decodeNeonSc3SameExtra(machInst);
} else if (bits(machInst, 23, 22) == 0 &&
bits(machInst, 15) == 0) {
if (bits(machInst, 10) == 1) {

View File

@@ -652,7 +652,10 @@ let {{
}
case 0xb:
if (o1) {
if (u || q) {
if (u) {
return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
q, size, machInst, vd, vn, vm);
} else if (q) {
return new Unknown(machInst);
} else {
return decodeNeonUThreeUSReg<NVpaddD>(
@@ -669,7 +672,10 @@ let {{
}
case 0xc:
if (o1) {
if (!u) {
if (u) {
return decodeNeonSThreeSReg<VqrdmlshD, VqrdmlshQ>(
q, size, machInst, vd, vn, vm);
} else {
if (bits(size, 1) == 0) {
if (q) {
return new NVfmaQFp<float>(machInst, vd, vn, vm);
@@ -1504,6 +1510,54 @@ let {{
return new Unknown(machInst);
}
}
case 0xe:
if (u) {
switch (size) {
case 1:
return new VqrdmlahsQ<int16_t>(
machInst, vd, vn, vm, index);
case 2:
return new VqrdmlahsQ<int32_t>(
machInst, vd, vn, vm, index);
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 1:
return new VqrdmlahsD<int16_t>(
machInst, vd, vn, vm, index);
case 2:
return new VqrdmlahsD<int32_t>(
machInst, vd, vn, vm, index);
default:
return new Unknown(machInst);
}
}
case 0xf:
if (u) {
switch (size) {
case 1:
return new VqrdmlshsQ<int16_t>(
machInst, vd, vn, vm, index);
case 2:
return new VqrdmlshsQ<int32_t>(
machInst, vd, vn, vm, index);
default:
return new Unknown(machInst);
}
} else {
switch (size) {
case 1:
return new VqrdmlshsD<int16_t>(
machInst, vd, vn, vm, index);
case 2:
return new VqrdmlshsD<int32_t>(
machInst, vd, vn, vm, index);
default:
return new Unknown(machInst);
}
}
}
return new Unknown(machInst);
}

View File

@@ -66,6 +66,8 @@ namespace Aarch64
// AdvSIMD scalar three same
inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
// AdvSIMD scalar three same extra
inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst);
// AdvSIMD scalar three different
inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
// AdvSIMD scalar two-reg misc
@@ -516,6 +518,20 @@ namespace Aarch64
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
switch (opcode) {
case 0x10:
if (q)
return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
size, machInst, vd, vn, vm);
else
return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
size, machInst, vd, vn, vm);
case 0x11:
if (q)
return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
size, machInst, vd, vn, vm);
else
return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
size, machInst, vd, vn, vm);
case 0x18:
case 0x19:
case 0x1a:
@@ -1531,10 +1547,16 @@ namespace Aarch64
return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
q, size, machInst, vd, vn, vm, index);
case 0xd:
if (u || (size == 0x0 || size == 0x3))
return new Unknown64(machInst);
if (u)
return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
SqrdmlahElemQX>(
q, size, machInst, vd, vn, vm, index);
else
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>(
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
SqrdmulhElemQX>(
q, size, machInst, vd, vn, vm, index);
case 0xf:
return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
q, size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);
@@ -2105,6 +2127,28 @@ namespace Aarch64
}
}
StaticInstPtr
decodeNeonSc3SameExtra(ExtMachInst machInst)
{
uint8_t size = bits(machInst, 23, 22);
uint8_t opcode = bits(machInst, 15, 11);
IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
switch (opcode) {
case 0x10:
return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
size, machInst, vd, vn, vm);
case 0x11:
return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
size, machInst, vd, vn, vm);
default:
return new Unknown64(machInst);
}
}
StaticInstPtr
decodeNeonSc3Diff(ExtMachInst machInst)
{
@@ -2434,10 +2478,9 @@ namespace Aarch64
}
IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
if (u && opcode != 9)
return new Unknown64(machInst);
uint8_t u_opcode = opcode | u << 4;
switch (opcode) {
switch (u_opcode) {
case 0x1:
if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
@@ -2465,11 +2508,7 @@ namespace Aarch64
case 0x9:
if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
if (u)
return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
else
return decodeNeonUThreeImmScFpReg<FmulElemScX>(
return decodeNeonUThreeImmScFpReg<FmulElemScX>(
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
case 0xb:
if (size == 0x0 || size == 0x3)
@@ -2484,10 +2523,20 @@ namespace Aarch64
return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
size, machInst, vd, vn, vm, index);
case 0xd:
if (size == 0x0 || size == 0x3)
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
size, machInst, vd, vn, vm, index);
case 0x19:
if (size < 2 || sz_L == 0x3)
return new Unknown64(machInst);
else
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
case 0x1d:
return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
size, machInst, vd, vn, vm, index);
case 0x1f:
return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
size, machInst, vd, vn, vm, index);
default:
return new Unknown64(machInst);

View File

@@ -1147,7 +1147,7 @@ let {{
def threeEqualRegInst(name, Name, opClass, types, rCount, op,
readDest=False, pairwise=False, byElem=False,
standardFpcsr=False, complex=False):
standardFpcsr=False, complex=False, extra=''):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, destReg;
@@ -1203,6 +1203,7 @@ let {{
}
''' % { "op" : op, "readDest" : readDestCode }
else:
eWalkCode += extra
eWalkCode += '''
for (unsigned i = 0; i < eCount; i++) {
Element srcElem1 = letoh(srcReg1.elements[i]);
@@ -1398,7 +1399,8 @@ let {{
threeUnequalRegInst(name, Name, opClass, types, op,
True, False, True, readDest)
def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
def twoEqualRegInst(name, Name, opClass, types, rCount, op,
readDest=False, extra=''):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, srcReg2, destReg;
@@ -1415,6 +1417,7 @@ let {{
readDestCode = ''
if readDest:
readDestCode = 'destElem = letoh(destReg.elements[i]);'
eWalkCode += extra
eWalkCode += '''
if (imm >= eCount) {
return std::make_shared<UndefinedInstruction>(machInst, false,
@@ -2783,6 +2786,55 @@ let {{
threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
vqrdmCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
int nbits = sizeof(Element)*8;
auto val_max = std::numeric_limits<Element>::max();
auto val_min = std::numeric_limits<Element>::min();
BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
((BigElement)1 << (nbits - 1));
unsat_value >>= nbits;
if (unsat_value > val_max) {
fpscr.qc = 1;
destElem = val_max;
} else if (unsat_value < val_min) {
fpscr.qc = 1;
destElem = val_min;
} else {
destElem = unsat_value;
}
FpscrQc = fpscr;
'''
code_add = "+"
vqrdmlahCode = vqrdmCode % {'code': code_add}
rdm_check = '''
int sz = bits(machInst, 21, 20);
RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5);
if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0)
return std::make_shared<UndefinedInstruction>(machInst, true);
typedef __int128_t BigElement;
'''
threeEqualRegInst("vqrdmlah", "VqrdmlahD",
"SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
extra=rdm_check)
threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
extra=rdm_check)
code_sub = "-"
vqrdmlshCode = vqrdmCode % {'code': code_sub}
threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
"SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
extra=rdm_check)
threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
extra=rdm_check)
vqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
@@ -3033,6 +3085,18 @@ let {{
"SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
"SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
extra=rdm_check)
twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
extra=rdm_check)
twoEqualRegInst("vqrdmlsh", "VqrdmlshsD",
"SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
extra=rdm_check)
twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
extra=rdm_check)
vshrCode = '''
if (imm >= sizeof(srcElem1) * 8) {

View File

@@ -52,7 +52,8 @@ let {{
def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
readDest=False, pairwise=False, scalar=False,
byElem=False, decoder='Generic', complex=False):
byElem=False, decoder='Generic', complex=False,
extra=''):
assert (not pairwise) or ((not byElem) and (not scalar))
global header_output, exec_output, decoders
eWalkCode = simd64EnabledCheckCode + '''
@@ -110,6 +111,7 @@ let {{
continue;
}
'''
eWalkCode += extra
eWalkCode += '''
for (unsigned i = 0; i < eCount; i++) {
%(scalarCheck)s
@@ -2336,7 +2338,81 @@ let {{
sqnegCode)
twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
sqnegCode, scalar=True)
# SQRDMULH (by element)
sqrdmCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
int nbits = sizeof(Element)*8;
auto val_max = std::numeric_limits<Element>::max();
auto val_min = std::numeric_limits<Element>::min();
BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
((BigElement)1 << (nbits - 1));
unsat_value >>= nbits;
if (unsat_value > val_max) {
fpscr.qc = 1;
destElem = val_max;
} else if (unsat_value < val_min) {
fpscr.qc = 1;
destElem = val_min;
} else {
destElem = unsat_value;
}
FpscrQc = fpscr;
'''
code_add = "+"
sqrdmlahCode = sqrdmCode % {'code': code_add}
rdm_check = '''
int sz = bits(machInst, 23, 22);
AA64ISAR0 isar0 = xc->tcBase()->readMiscReg( MISCREG_ID_AA64ISAR0_EL1);
if (!isar0.rdm || sz == 3 || sz == 0)
return std::make_shared<UndefinedInstruction>(machInst, true);
typedef __int128_t BigElement;
'''
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
("int16_t", "int32_t"), 2, sqrdmlahCode, byElem=True,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
readDest=True, scalar=True, extra=rdm_check)
# SQRDMLAH (vector)
threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
("int16_t", "int32_t"), 2, sqrdmlahCode,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlahCode,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlahCode, scalar=True,
readDest=True, extra=rdm_check)
# SQRDMLSH (by element)
code_sub = "-"
sqrdmlshCode = sqrdmCode % {'code': code_sub}
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
("int16_t", "int32_t"), 2, sqrdmlshCode, byElem=True,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
readDest=True, scalar=True, extra=rdm_check)
# SQRDMLSH (vector)
threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
("int16_t", "int32_t"), 2, sqrdmlshCode,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlshCode,
readDest=True, extra=rdm_check)
threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
("int16_t", "int32_t"), 4, sqrdmlshCode, scalar=True,
readDest=True, extra=rdm_check)
# SQRDMULby element)
sqrdmulhCode = '''
FPSCR fpscr = (FPSCR) FpscrQc;
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +