arch-arm: Implementation ARMv8.1 RDMA
Adding RDMA implementation for ARMv8.1
+ isa/formats/*: Adding decoding of Aarch64 and aarch32 instructions
+ isa/insts/neon.isa\neon64.isa: Adding function instructions
Change-Id: I430e8880723f373ffffa50079a87fd4ecc634d86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36015
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -80,7 +80,7 @@ class ArmISA(BaseISA):
|
||||
id_isar2 = Param.UInt32(0x21232141, "Instruction Set Attribute Register 2")
|
||||
id_isar3 = Param.UInt32(0x01112131, "Instruction Set Attribute Register 3")
|
||||
id_isar4 = Param.UInt32(0x10010142, "Instruction Set Attribute Register 4")
|
||||
id_isar5 = Param.UInt32(0x10000000, "Instruction Set Attribute Register 5")
|
||||
id_isar5 = Param.UInt32(0x11000000, "Instruction Set Attribute Register 5")
|
||||
|
||||
fpsid = Param.UInt32(0x410430a0, "Floating-point System ID Register")
|
||||
|
||||
@@ -98,8 +98,8 @@ class ArmISA(BaseISA):
|
||||
id_aa64dfr1_el1 = Param.UInt64(0x0000000000000000,
|
||||
"AArch64 Debug Feature Register 1")
|
||||
|
||||
# !TME | !Atomic | !CRC32 | !SHA2 | !SHA1 | !AES
|
||||
id_aa64isar0_el1 = Param.UInt64(0x0000000000000000,
|
||||
# !TME | !Atomic | !CRC32 | !SHA2 | RDM | !SHA1 | !AES
|
||||
id_aa64isar0_el1 = Param.UInt64(0x0000000010000000,
|
||||
"AArch64 Instruction Set Attribute Register 0")
|
||||
|
||||
# GPI = 0x0 | GPA = 0x1 | API=0x0 | FCMA | JSCVT | APA=0x1
|
||||
|
||||
@@ -2975,6 +2975,8 @@ namespace Aarch64
|
||||
} else {
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
} else if (bits(machInst, 15) && bits(machInst, 10) == 1) {
|
||||
return decodeNeonSc3SameExtra(machInst);
|
||||
} else if (bits(machInst, 23, 22) == 0 &&
|
||||
bits(machInst, 15) == 0) {
|
||||
if (bits(machInst, 10) == 1) {
|
||||
|
||||
@@ -652,7 +652,10 @@ let {{
|
||||
}
|
||||
case 0xb:
|
||||
if (o1) {
|
||||
if (u || q) {
|
||||
if (u) {
|
||||
return decodeNeonSThreeSReg<VqrdmlahD, VqrdmlahQ>(
|
||||
q, size, machInst, vd, vn, vm);
|
||||
} else if (q) {
|
||||
return new Unknown(machInst);
|
||||
} else {
|
||||
return decodeNeonUThreeUSReg<NVpaddD>(
|
||||
@@ -669,7 +672,10 @@ let {{
|
||||
}
|
||||
case 0xc:
|
||||
if (o1) {
|
||||
if (!u) {
|
||||
if (u) {
|
||||
return decodeNeonSThreeSReg<VqrdmlshD, VqrdmlshQ>(
|
||||
q, size, machInst, vd, vn, vm);
|
||||
} else {
|
||||
if (bits(size, 1) == 0) {
|
||||
if (q) {
|
||||
return new NVfmaQFp<float>(machInst, vd, vn, vm);
|
||||
@@ -1504,6 +1510,54 @@ let {{
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
case 0xe:
|
||||
if (u) {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new VqrdmlahsQ<int16_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
case 2:
|
||||
return new VqrdmlahsQ<int32_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new VqrdmlahsD<int16_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
case 2:
|
||||
return new VqrdmlahsD<int32_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
case 0xf:
|
||||
if (u) {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new VqrdmlshsQ<int16_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
case 2:
|
||||
return new VqrdmlshsQ<int32_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
} else {
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new VqrdmlshsD<int16_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
case 2:
|
||||
return new VqrdmlshsD<int32_t>(
|
||||
machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Unknown(machInst);
|
||||
}
|
||||
|
||||
@@ -66,6 +66,8 @@ namespace Aarch64
|
||||
|
||||
// AdvSIMD scalar three same
|
||||
inline StaticInstPtr decodeNeonSc3Same(ExtMachInst machInst);
|
||||
// AdvSIMD scalar three same extra
|
||||
inline StaticInstPtr decodeNeonSc3SameExtra(ExtMachInst machInst);
|
||||
// AdvSIMD scalar three different
|
||||
inline StaticInstPtr decodeNeonSc3Diff(ExtMachInst machInst);
|
||||
// AdvSIMD scalar two-reg misc
|
||||
@@ -516,6 +518,20 @@ namespace Aarch64
|
||||
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
|
||||
switch (opcode) {
|
||||
case 0x10:
|
||||
if (q)
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlahQX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
else
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlahDX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
case 0x11:
|
||||
if (q)
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlshQX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
else
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
@@ -1531,10 +1547,16 @@ namespace Aarch64
|
||||
return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
|
||||
q, size, machInst, vd, vn, vm, index);
|
||||
case 0xd:
|
||||
if (u || (size == 0x0 || size == 0x3))
|
||||
return new Unknown64(machInst);
|
||||
if (u)
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
|
||||
SqrdmlahElemQX>(
|
||||
q, size, machInst, vd, vn, vm, index);
|
||||
else
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX, SqrdmulhElemQX>(
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
|
||||
SqrdmulhElemQX>(
|
||||
q, size, machInst, vd, vn, vm, index);
|
||||
case 0xf:
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
|
||||
q, size, machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
@@ -2105,6 +2127,28 @@ namespace Aarch64
|
||||
}
|
||||
}
|
||||
|
||||
StaticInstPtr
|
||||
decodeNeonSc3SameExtra(ExtMachInst machInst)
|
||||
{
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
uint8_t opcode = bits(machInst, 15, 11);
|
||||
|
||||
IntRegIndex vd = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
IntRegIndex vn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
IntRegIndex vm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
|
||||
switch (opcode) {
|
||||
case 0x10:
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlahScX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
case 0x11:
|
||||
return decodeNeonSThreeHAndWReg<SqrdmlshScX>(
|
||||
size, machInst, vd, vn, vm);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
StaticInstPtr
|
||||
decodeNeonSc3Diff(ExtMachInst machInst)
|
||||
{
|
||||
@@ -2434,10 +2478,9 @@ namespace Aarch64
|
||||
}
|
||||
IntRegIndex vm_fp = (IntRegIndex) (uint8_t) (vmh << 4 | vm_bf);
|
||||
|
||||
if (u && opcode != 9)
|
||||
return new Unknown64(machInst);
|
||||
uint8_t u_opcode = opcode | u << 4;
|
||||
|
||||
switch (opcode) {
|
||||
switch (u_opcode) {
|
||||
case 0x1:
|
||||
if (size < 2 || sz_L == 0x3)
|
||||
return new Unknown64(machInst);
|
||||
@@ -2465,11 +2508,7 @@ namespace Aarch64
|
||||
case 0x9:
|
||||
if (size < 2 || sz_L == 0x3)
|
||||
return new Unknown64(machInst);
|
||||
if (u)
|
||||
return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
|
||||
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
|
||||
else
|
||||
return decodeNeonUThreeImmScFpReg<FmulElemScX>(
|
||||
return decodeNeonUThreeImmScFpReg<FmulElemScX>(
|
||||
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
|
||||
case 0xb:
|
||||
if (size == 0x0 || size == 0x3)
|
||||
@@ -2484,10 +2523,20 @@ namespace Aarch64
|
||||
return decodeNeonSThreeImmHAndWReg<SqdmulhElemScX>(
|
||||
size, machInst, vd, vn, vm, index);
|
||||
case 0xd:
|
||||
if (size == 0x0 || size == 0x3)
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
|
||||
size, machInst, vd, vn, vm, index);
|
||||
case 0x19:
|
||||
if (size < 2 || sz_L == 0x3)
|
||||
return new Unknown64(machInst);
|
||||
else
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmulhElemScX>(
|
||||
return decodeNeonUThreeImmScFpReg<FmulxElemScX>(
|
||||
size & 0x1, machInst, vd, vn, vm_fp, index_fp);
|
||||
|
||||
case 0x1d:
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmlahElemScX>(
|
||||
size, machInst, vd, vn, vm, index);
|
||||
|
||||
case 0x1f:
|
||||
return decodeNeonSThreeImmHAndWReg<SqrdmlshElemScX>(
|
||||
size, machInst, vd, vn, vm, index);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
|
||||
@@ -1147,7 +1147,7 @@ let {{
|
||||
|
||||
def threeEqualRegInst(name, Name, opClass, types, rCount, op,
|
||||
readDest=False, pairwise=False, byElem=False,
|
||||
standardFpcsr=False, complex=False):
|
||||
standardFpcsr=False, complex=False, extra=''):
|
||||
global header_output, exec_output
|
||||
eWalkCode = simdEnabledCheckCode + '''
|
||||
RegVect srcReg1, destReg;
|
||||
@@ -1203,6 +1203,7 @@ let {{
|
||||
}
|
||||
''' % { "op" : op, "readDest" : readDestCode }
|
||||
else:
|
||||
eWalkCode += extra
|
||||
eWalkCode += '''
|
||||
for (unsigned i = 0; i < eCount; i++) {
|
||||
Element srcElem1 = letoh(srcReg1.elements[i]);
|
||||
@@ -1398,7 +1399,8 @@ let {{
|
||||
threeUnequalRegInst(name, Name, opClass, types, op,
|
||||
True, False, True, readDest)
|
||||
|
||||
def twoEqualRegInst(name, Name, opClass, types, rCount, op, readDest=False):
|
||||
def twoEqualRegInst(name, Name, opClass, types, rCount, op,
|
||||
readDest=False, extra=''):
|
||||
global header_output, exec_output
|
||||
eWalkCode = simdEnabledCheckCode + '''
|
||||
RegVect srcReg1, srcReg2, destReg;
|
||||
@@ -1415,6 +1417,7 @@ let {{
|
||||
readDestCode = ''
|
||||
if readDest:
|
||||
readDestCode = 'destElem = letoh(destReg.elements[i]);'
|
||||
eWalkCode += extra
|
||||
eWalkCode += '''
|
||||
if (imm >= eCount) {
|
||||
return std::make_shared<UndefinedInstruction>(machInst, false,
|
||||
@@ -2783,6 +2786,55 @@ let {{
|
||||
threeEqualRegInst("vqdmulh", "VqdmulhD", "SimdMultOp", smallSignedTypes, 2, vqdmulhCode)
|
||||
threeEqualRegInst("vqdmulh", "VqdmulhQ", "SimdMultOp", smallSignedTypes, 4, vqdmulhCode)
|
||||
|
||||
|
||||
vqrdmCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrQc;
|
||||
int nbits = sizeof(Element)*8;
|
||||
|
||||
auto val_max = std::numeric_limits<Element>::max();
|
||||
auto val_min = std::numeric_limits<Element>::min();
|
||||
BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
|
||||
((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
|
||||
((BigElement)1 << (nbits - 1));
|
||||
unsat_value >>= nbits;
|
||||
|
||||
if (unsat_value > val_max) {
|
||||
fpscr.qc = 1;
|
||||
destElem = val_max;
|
||||
} else if (unsat_value < val_min) {
|
||||
fpscr.qc = 1;
|
||||
destElem = val_min;
|
||||
} else {
|
||||
destElem = unsat_value;
|
||||
}
|
||||
FpscrQc = fpscr;
|
||||
'''
|
||||
code_add = "+"
|
||||
vqrdmlahCode = vqrdmCode % {'code': code_add}
|
||||
rdm_check = '''
|
||||
int sz = bits(machInst, 21, 20);
|
||||
RegVal isar5 = xc->tcBase()->readMiscReg(MISCREG_ID_ISAR5);
|
||||
if (!(bits(isar5, 27, 24) == 0x1) || sz == 3 || sz == 0)
|
||||
return std::make_shared<UndefinedInstruction>(machInst, true);
|
||||
typedef __int128_t BigElement;
|
||||
'''
|
||||
threeEqualRegInst("vqrdmlah", "VqrdmlahD",
|
||||
"SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
threeEqualRegInst("vqrdmlah", "VqrdmlahQ",
|
||||
"SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
|
||||
code_sub = "-"
|
||||
vqrdmlshCode = vqrdmCode % {'code': code_sub}
|
||||
threeEqualRegInst("vqrdmlsh", "VqrdmlshD",
|
||||
"SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
threeEqualRegInst("vqrdmlsh", "VqrdmlshQ",
|
||||
"SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
|
||||
|
||||
vqrdmulhCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrQc;
|
||||
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
|
||||
@@ -3033,6 +3085,18 @@ let {{
|
||||
"SimdMultOp", smallSignedTypes, 2, vqrdmulhCode)
|
||||
twoEqualRegInst("vqrdmulh", "VqrdmulhsQ",
|
||||
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
|
||||
twoEqualRegInst("vqrdmlah", "VqrdmlahsD",
|
||||
"SimdMultOp", smallSignedTypes, 2, vqrdmlahCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
twoEqualRegInst("vqrdmlah", "VqrdmlahsQ",
|
||||
"SimdMultOp", smallSignedTypes, 4, vqrdmlahCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
twoEqualRegInst("vqrdmlsh", "VqrdmlshsD",
|
||||
"SimdMultOp", smallSignedTypes, 2, vqrdmlshCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
twoEqualRegInst("vqrdmlsh", "VqrdmlshsQ",
|
||||
"SimdMultOp", smallSignedTypes, 4, vqrdmlshCode, readDest=True,
|
||||
extra=rdm_check)
|
||||
|
||||
vshrCode = '''
|
||||
if (imm >= sizeof(srcElem1) * 8) {
|
||||
|
||||
@@ -52,7 +52,8 @@ let {{
|
||||
|
||||
def threeEqualRegInstX(name, Name, opClass, types, rCount, op,
|
||||
readDest=False, pairwise=False, scalar=False,
|
||||
byElem=False, decoder='Generic', complex=False):
|
||||
byElem=False, decoder='Generic', complex=False,
|
||||
extra=''):
|
||||
assert (not pairwise) or ((not byElem) and (not scalar))
|
||||
global header_output, exec_output, decoders
|
||||
eWalkCode = simd64EnabledCheckCode + '''
|
||||
@@ -110,6 +111,7 @@ let {{
|
||||
continue;
|
||||
}
|
||||
'''
|
||||
eWalkCode += extra
|
||||
eWalkCode += '''
|
||||
for (unsigned i = 0; i < eCount; i++) {
|
||||
%(scalarCheck)s
|
||||
@@ -2336,7 +2338,81 @@ let {{
|
||||
sqnegCode)
|
||||
twoEqualRegInstX("sqneg", "SqnegScX", "SimdAluOp", signedTypes, 4,
|
||||
sqnegCode, scalar=True)
|
||||
# SQRDMULH (by element)
|
||||
sqrdmCode = '''
|
||||
|
||||
FPSCR fpscr = (FPSCR) FpscrQc;
|
||||
int nbits = sizeof(Element)*8;
|
||||
|
||||
auto val_max = std::numeric_limits<Element>::max();
|
||||
auto val_min = std::numeric_limits<Element>::min();
|
||||
BigElement unsat_value = ((BigElement)destElem << nbits) %(code)s
|
||||
((BigElement)srcElem1 * (BigElement)srcElem2 * 2) +
|
||||
((BigElement)1 << (nbits - 1));
|
||||
unsat_value >>= nbits;
|
||||
|
||||
if (unsat_value > val_max) {
|
||||
fpscr.qc = 1;
|
||||
destElem = val_max;
|
||||
} else if (unsat_value < val_min) {
|
||||
fpscr.qc = 1;
|
||||
destElem = val_min;
|
||||
} else {
|
||||
destElem = unsat_value;
|
||||
}
|
||||
FpscrQc = fpscr;
|
||||
'''
|
||||
code_add = "+"
|
||||
sqrdmlahCode = sqrdmCode % {'code': code_add}
|
||||
rdm_check = '''
|
||||
int sz = bits(machInst, 23, 22);
|
||||
AA64ISAR0 isar0 = xc->tcBase()->readMiscReg( MISCREG_ID_AA64ISAR0_EL1);
|
||||
if (!isar0.rdm || sz == 3 || sz == 0)
|
||||
return std::make_shared<UndefinedInstruction>(machInst, true);
|
||||
typedef __int128_t BigElement;
|
||||
'''
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemDX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 2, sqrdmlahCode, byElem=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemQX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahElemScX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlahCode, byElem=True,
|
||||
readDest=True, scalar=True, extra=rdm_check)
|
||||
# SQRDMLAH (vector)
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahDX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 2, sqrdmlahCode,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahQX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlahCode,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlah", "SqrdmlahScX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlahCode, scalar=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
# SQRDMLSH (by element)
|
||||
code_sub = "-"
|
||||
sqrdmlshCode = sqrdmCode % {'code': code_sub}
|
||||
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemDX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 2, sqrdmlshCode, byElem=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemQX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshElemScX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlshCode, byElem=True,
|
||||
readDest=True, scalar=True, extra=rdm_check)
|
||||
# SQRDMLSH (vector)
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshDX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 2, sqrdmlshCode,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshQX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlshCode,
|
||||
readDest=True, extra=rdm_check)
|
||||
threeEqualRegInstX("sqrdmlsh", "SqrdmlshScX", "SimdMultOp",
|
||||
("int16_t", "int32_t"), 4, sqrdmlshCode, scalar=True,
|
||||
readDest=True, extra=rdm_check)
|
||||
# SQRDMULby element)
|
||||
sqrdmulhCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrQc;
|
||||
destElem = (2 * (int64_t)srcElem1 * (int64_t)srcElem2 +
|
||||
|
||||
Reference in New Issue
Block a user