arch-arm: Partial SVE2 Implementation (#657)

Instructions added:

BGRP, RAX1, EOR3, BCAX,
XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T

Move from gerrit [1]

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/70277

Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
This commit is contained in:
Giacomo Travaglini
2023-12-13 10:27:19 +00:00
committed by GitHub
3 changed files with 464 additions and 22 deletions

View File

@@ -568,6 +568,193 @@ namespace Aarch64
return new Unknown64(machInst);
} // decodeSveIntArithUnpred
StaticInstPtr
decodeSveIntMulUnpred(ExtMachInst machInst)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t opc = bits(machInst, 11, 10);
uint8_t size = bits(machInst, 23, 22);
switch (opc) {
case 0x1:
if (size == 0x0) {
return new SvePmul<uint8_t>(machInst, zd, zn, zm);
}
[[fallthrough]];
case 0x0:
// MUL (vectors, unpredicated)
case 0x2:
// SMULH (unpredicated)
case 0x3:
// UMULH (unpredicated)
default:
return new Unknown64(machInst);
}
} // decodeSveIntMulUnpred
StaticInstPtr
decodeSveIntTerUnpred(ExtMachInst machInst)
{
RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10);
switch (opc) {
case 0x0:
return new SveEor3<uint64_t>(machInst, zdn, zm, zk);
case 0x2:
return new SveBcax<uint64_t>(machInst, zdn, zm, zk);
case 0x1:
// BSL
case 0x3:
// BSL1N
case 0x5:
// BSL2N
case 0x7:
// NBSL
default:
return new Unknown64(machInst);
}
} // decodeSveIntTerUnpred
StaticInstPtr
decodeSve2IntMulLong(ExtMachInst machInst)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t opc_u_t = bits(machInst, 12, 10);
uint8_t size = bits(machInst, 23, 22);
switch (opc_u_t) {
case 0x2:
return decodeSveBinUnpredS2<SvePmullb>(
size, machInst, zd, zn, zm);
case 0x3:
return decodeSveBinUnpredS2<SvePmullt>(
size, machInst, zd, zn, zm);
case 0x4:
return decodeSveBinUnpredSigned<SveSmullb>(
size, machInst, zd, zn, zm);
case 0x5:
return decodeSveBinUnpredSigned<SveSmullt>(
size, machInst, zd, zn, zm);
case 0x6:
return decodeSveBinUnpredUnsigned<SveUmullb>(
size, machInst, zd, zn, zm);
case 0x7:
return decodeSveBinUnpredUnsigned<SveUmullt>(
size, machInst, zd, zn, zm);
case 0x0:
// SQDMULLB
case 0x1:
// SQDMULLT
default:
return new Unknown64(machInst);
}
} // decodeSve2IntMulLong
StaticInstPtr
decodeSve2BitPerm(ExtMachInst machInst)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t opc = bits(machInst, 11, 10);
uint8_t size = bits(machInst, 23, 22);
switch (opc) {
case 0x2:
return decodeSveBinUnpredU<SveBgrp>(
size, machInst, zd, zn, zm);
case 0x0:
// BEXT
case 0x1:
// BDEP
default:
return new Unknown64(machInst);
}
} // decodeSve2BitPerm
StaticInstPtr
decodeSveIntRotImm(ExtMachInst machInst)
{
RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5);
uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16);
uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20, 19);
uint8_t esize = 0;
uint8_t size = 0;
if (tsize == 0x0) {
return new Unknown64(machInst);
} else if (tsize == 0x1) {
esize = 8;
} else if ((tsize & 0x0E) == 0x2) {
esize = 16;
size = 1;
} else if ((tsize & 0x0C) == 0x4) {
esize = 32;
size = 2;
} else if ((tsize & 0x08) == 0x8) {
esize = 64;
size = 3;
}
unsigned rot_am = 2 * esize - ((tsize << 3) | imm3);
return decodeSveBinImmDestrUnpredU<SveXar>(
size, machInst, zdn, zm, rot_am);
} // decodeSveIntRotImm
StaticInstPtr
decodeSve2CryptBinConstr(ExtMachInst machInst)
{
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t size = bits(machInst, 23, 22);
uint8_t opc = bits(machInst, 10);
uint8_t size_opc = (size << 1) | opc;
switch (size_opc) {
case 0x1:
return new SveRax1<uint64_t>(machInst, zd, zn, zm);
case 0x0:
// SM4EKEY
default:
return new Unknown64(machInst);
}
} // decodeSve2CryptBinConstr
StaticInstPtr
decodeSve2WideIntArith(ExtMachInst machInst)
{
uint8_t op0 = bits(machInst, 14, 13);
switch (op0) {
case 0b11:
return decodeSve2IntMulLong(machInst);
default:
return new Unknown64(machInst);
}
}
StaticInstPtr
decodeSve2Crypto(ExtMachInst machInst)
{
uint8_t op2 = bits(machInst, 12, 11);
switch (op2) {
case 0b10:
return decodeSve2CryptBinConstr(machInst);
default:
return new Unknown64(machInst);
}
}
StaticInstPtr
decodeSveIntLogUnpred(ExtMachInst machInst)
{
@@ -1100,12 +1287,19 @@ namespace Aarch64
decodeSvePermUnpred(ExtMachInst machInst)
{
uint8_t b12_10 = bits(machInst, 12, 10);
if (b12_10 == 0x4) {
if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) {
unsigned size = (unsigned) bits(machInst, 23, 22);
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
if (b12_10 == 0x4) { // TBL, two sources
return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
} else if (bits(machInst, 10) == 0x1) { // TBX
return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn, zm);
// } else { // TBL, three sources
// TBL, three sources
}
return new Unknown64(machInst);
} else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) {
uint8_t size = bits(machInst, 23, 22);
RegIndex rn = makeSP(
@@ -1450,7 +1644,6 @@ namespace Aarch64
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10);
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
uint8_t size = bits(machInst, 23, 22);
return decodeSveBinConstrPredU<SveSel>(size,
@@ -3936,16 +4129,18 @@ namespace Aarch64
} // decodeSveMemStore
StaticInstPtr
decodeSveMisc(ExtMachInst machInst) {
decodeSveMisc(ExtMachInst machInst)
{
switch(bits(machInst, 13, 10)) {
case 0b0110: {
return decodeSveIntMatMulAdd(machInst);
break;
}
default: {
return new Unknown64(machInst);
break;
}
case 0b0110:
return decodeSveIntMatMulAdd(machInst);
case 0b1100:
case 0b1101:
case 0b1110:
case 0b1111:
return decodeSve2BitPerm(machInst);
default:
return new Unknown64(machInst);
}
return new Unknown64(machInst);
} // decodeSveMisc

View File

@@ -45,7 +45,9 @@ namespace Aarch64
StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);
StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst);
StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst);
@@ -71,6 +73,12 @@ namespace Aarch64
StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveClamp(ExtMachInst machInst);
StaticInstPtr decodeSve2Accum(ExtMachInst machInst);
StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst);
StaticInstPtr decodeSve2CryptBinConstr(ExtMachInst machInst);
StaticInstPtr decodeSve2BitPerm(ExtMachInst machInst);
StaticInstPtr decodeSve2IntMulLong(ExtMachInst machInst);
StaticInstPtr decodeSve2WideIntArith(ExtMachInst machInst);
StaticInstPtr decodeSve2Crypto(ExtMachInst machInst);
StaticInstPtr decodeSveIntegerDotProductUnpred(ExtMachInst machInst);
StaticInstPtr decodeSveIntegerDotProductIndexed(ExtMachInst machInst);
@@ -129,10 +137,14 @@ namespace Aarch64
break;
case 0b10:
case 0b11:
if (bits(machInst, 21) == 0b0 && op2 == 0b10) {
if (bits(machInst, 21) == 0b0 && bits(op2, 1) == 0b0) {
return decodeSve2WideIntArith(machInst);
} else if (bits(machInst, 21) == 0b0 && op2 == 0b10) {
return decodeSveMisc(machInst);
} else if (bits(machInst, 21) == 0b0 && op2 == 0b11) {
return decodeSve2Accum(machInst);
} else if (bits(machInst, 21) == 0b1 && bits(machInst, 15, 13) == 0b111) {
return decodeSve2Crypto(machInst);
} else {
return new Unknown64(machInst);
}
@@ -180,7 +192,15 @@ namespace Aarch64
switch (b_15_14) {
case 0x0:
if (b_13) {
return decodeSveIntLogUnpred(machInst);
if (bits(machInst, 11)) {
return decodeSveIntTerUnpred(machInst);
} else {
if (bits(machInst, 10)) {
return decodeSveIntRotImm(machInst);
} else {
return decodeSveIntLogUnpred(machInst);
}
}
} else {
if (!bits(machInst, 30)) {
return decodeSveIntArithUnpred(machInst);
@@ -189,7 +209,7 @@ namespace Aarch64
break;
case 0x1:
if (b_13) {
return new Unknown64(machInst);
return decodeSveIntMulUnpred(machInst);
} else if (b_12) {
return decodeSveStackAlloc(machInst);
} else {

View File

@@ -325,6 +325,28 @@ output header {{
}
}
// Decodes binary with immediate operand, destructive, unpredicated
// SVE instructions, handling unsigned variants only.
template <template <typename T> class Base>
StaticInstPtr
decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst,
RegIndex dest, RegIndex op1, unsigned immediate)
{
switch (size) {
case 0:
return new Base<uint8_t>(machInst, dest, op1, immediate);
case 1:
return new Base<uint16_t>(machInst, dest, op1, immediate);
case 2:
return new Base<uint32_t>(machInst, dest, op1, immediate);
case 3:
return new Base<uint64_t>(machInst, dest, op1, immediate);
default:
return new Unknown64(machInst);
}
}
// Decodes binary with immediate operand, destructive, predicated (merging)
// SVE instructions, handling unsigned variants only.
template <template <typename T> class Base>
@@ -611,6 +633,44 @@ output header {{
}
}
// Decodes binary, constructive, unpredicated SVE instructions.
// unsigned limited variants
template <template <typename T> class Base>
StaticInstPtr
decodeSveBinUnpredUnsigned(unsigned size, ExtMachInst machInst,
RegIndex dest, RegIndex op1, RegIndex op2)
{
switch (size) {
case 1:
return new Base<uint8_t>(machInst, dest, op1, op2);
case 2:
return new Base<uint16_t>(machInst, dest, op1, op2);
case 3:
return new Base<uint32_t>(machInst, dest, op1, op2);
default:
return new Unknown64(machInst);
}
}
// Decodes binary, constructive, unpredicated SVE instructions.
// signed limited variants
template <template <typename T> class Base>
StaticInstPtr
decodeSveBinUnpredSigned(unsigned size, ExtMachInst machInst,
RegIndex dest, RegIndex op1, RegIndex op2)
{
switch (size) {
case 1:
return new Base<int8_t>(machInst, dest, op1, op2);
case 2:
return new Base<int16_t>(machInst, dest, op1, op2);
case 3:
return new Base<int32_t>(machInst, dest, op1, op2);
default:
return new Unknown64(machInst);
}
}
// Decodes binary, constructive, unpredicated SVE instructions.
// Unsigned instructions only.
template <template <typename T> class Base>
@@ -676,6 +736,25 @@ output header {{
}
}
// Decodes binary, constructive, unpredicated SVE instructions.
// unsigned instructions only, limited variants.
template <template <typename T> class Base>
StaticInstPtr
decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndex dest,
RegIndex op1, RegIndex op2)
{
switch (size) {
case 0:
return new Base<uint64_t>(machInst, dest, op1, op2);
case 1:
return new Base<uint8_t>(machInst, dest, op1, op2);
case 3:
return new Base<uint32_t>(machInst, dest, op1, op2);
default:
return new Unknown64(machInst);
}
}
// Decodes binary, costructive, unpredicated SVE instructions, handling
// floating-point variants only.
template <template <typename T> class Base>
@@ -1934,8 +2013,7 @@ let {{
def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
isDestructive=False, customIterCode=None,
decoder='Generic'):
assert not (predType in (PredType.NONE, PredType.SELECT) and
isDestructive)
assert not ((predType == PredType.SELECT) and isDestructive)
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -1950,7 +2028,12 @@ let {{
code += '''
const Element& srcElem1 = AA64FpOp1_x[i];'''
code += '''
const Element& srcElem2 = AA64FpOp2_x[i];
const Element& srcElem2 = AA64FpOp2_x[i];'''
if (predType == PredType.NONE) and isDestructive:
code += '''
Element destElem = AA64FpDestMerge_x[i];'''
else:
code += '''
Element destElem = 0;'''
if predType != PredType.NONE:
code += '''
@@ -2762,8 +2845,8 @@ let {{
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
# Generate definitions for SVE TBL instructions
def sveTblInst(name, Name, opClass, decoder = 'Generic'):
# Generate definitions for SVE table lookup instructions with 2 sources
def sveTblInst(name, Name, opClass, decoder = 'Generic', merging = False):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
@@ -2774,10 +2857,10 @@ let {{
if (idx < eCount) {
val = AA64FpOp1_x[idx];
} else {
val = 0;
val = %(dest_elem)s;;
}
AA64FpDest_x[i] = val;
}'''
}''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'}
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveBinUnpredOpDeclare.subst(iop)
@@ -2787,6 +2870,63 @@ let {{
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
# Generate definitions for integer add/subtract long with carry
def sveLongCarryInst(name, Name, opClass, decoder = 'Generic',
uptTop = False, subtract = False):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (int i = 0; i < eCount/2; ++i) {
const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
const Element& srcElem2 = AA64FpOp2_x[2*i+1];
const Element& srcElem3 = AA64FpDestMerge_x[2*i];
__uint128_t unsigned_sum = (__uint128_t)srcElem3 +
(%(op)ssrcElem1) +
(srcElem2 & 0x1);
AA64FpDest_x[2*i] = (Element)unsigned_sum;
AA64FpDest_x[2*i+1] = (Element)unsigned_sum !=
(__uint128_t)unsigned_sum;
}
''' % {'offset': 1 if uptTop else 0,
'op': '~' if subtract else '',
}
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveBinUnpredOpDeclare.subst(iop)
exec_output += SveOpExecute.subst(iop)
for type in ('uint32_t', 'uint64_t'):
substDict = {'targs' : type,
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
# Generate definitions for long integer/poly multiplication instruction
def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic',
uptTop = False):
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (int i = 0; i < eCount/2; ++i) {
const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s];
%(op)s
AA64FpDest_x[2*i] = (Element)destElem;
AA64FpDest_x[2*i+1] = (Element)(destElem >>
(sizeof(Element) << 3));
}
''' % {'offset': 1 if uptTop else 0,
'op': op,
}
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
{'code': code, 'op_class': opClass}, [])
header_output += SveBinUnpredOpDeclare.subst(iop)
exec_output += SveOpExecute.subst(iop)
for type in types:
substDict = {'targs' : type,
'class_name' : 'Sve' + Name}
exec_output += SveOpExecDeclare.subst(substDict)
# Generate definitions for SVE Unpack instructions
def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
regType, decoder = 'Generic'):
@@ -3352,6 +3492,10 @@ let {{
absCode = 'destElem = (Element) std::abs(srcElem1);'
sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
PredType.MERGE)
# ADCLB
sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp')
# ADCLT
sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True)
# ADD (immediate)
sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False)
# ADD (vectors, predicated)
@@ -3457,6 +3601,29 @@ let {{
'''
sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
PredType.MERGE, True)
# BCAX
bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);'
sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode,
isDestructive=True)
# BGRP
bgrpCode = '''
int k = 0;
int len = sizeof(Element) * 8;
for(int j = 0; j < len; j++) {
if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){
destElem |= (((srcElem1>>j) & (Element)0x1) << k);
k++;
}
}
k = len-1;
for(int j = len-1; j >= 0; j--) {
if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){
destElem |= (((srcElem1>>j) & (Element)0x1) << k);
k--;
}
}
'''
sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode)
# BIC (vectors, predicated)
bicCode = 'destElem = srcElem1 & ~srcElem2;'
sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
@@ -3740,6 +3907,10 @@ let {{
eorCode)
svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
eorCode, isFlagSetting=True)
# EOR3
eorCode = 'destElem ^= srcElem1 ^ srcElem2;'
sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode,
isDestructive=True)
# EORV
eorvCode = 'destElem ^= srcElem1;'
sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
@@ -4355,6 +4526,30 @@ let {{
pfalseCode)
# PFIRST
svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
# PMUL
exec_output += '''
__uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2)
{
__uint128_t destElem = 0;
__uint128_t extendedElem2 = srcElem2;
int i;
for (i=0; i < 64; i++) {
if (((srcElem1 >> i) & 0x1) == 0x1) {
destElem ^= (extendedElem2 << i);
}
}
return destElem;
}'''
pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);'
sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode)
# PMULLB
pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);'
sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp',
('uint8_t','uint32_t','uint64_t',), pmullCode)
# PMULLT
sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp',
('uint8_t','uint32_t','uint64_t',),
pmullCode, uptTop = True)
# PNEXT
svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
# PSEL
@@ -4371,6 +4566,9 @@ let {{
# PUNPKLO
sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes,
unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
# RAX1
rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >> 63));'
sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code)
# RBIT
rbitCode = '''
destElem = reverseBits(srcElem1);'''
@@ -4447,6 +4645,11 @@ let {{
'''
sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
PredType.MERGE, True)
# SBCLB
sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True)
# SBCLT
sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True,
subtract = True)
# SADDV
addvCode = 'destElem += srcElem1;'
sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
@@ -4608,6 +4811,13 @@ let {{
destElem = do_mulh(srcElem1, srcElem2);'''
sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
PredType.MERGE, True)
# SMULLB
smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;'
sveLongMulInst('smullb', 'Smullb', 'SimdAluOp',
('int8_t','int16_t','int32_t',), smullCode)
# SMULLT
sveLongMulInst('smullt', 'Smullt', 'SimdAluOp',
('int8_t','int16_t','int32_t',), smullCode, uptTop = True)
# SPLICE
sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
# SQADD (immediate)
@@ -4793,6 +5003,8 @@ let {{
sxtCode, PredType.MERGE)
# TBL
sveTblInst('tbl', 'Tbl', 'SimdAluOp')
# TBX
sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True)
# TRN1, TRN2 (predicates)
trnPredIterCode = '''
constexpr unsigned sz = sizeof(Element);
@@ -4909,6 +5121,14 @@ let {{
# UMULH
sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
PredType.MERGE, True)
# UMULLB
umullCode = 'uint64_t destElem = (uint64_t)srcElem1 * (uint64_t)srcElem2;'
sveLongMulInst('umullb', 'Umullb', 'SimdAluOp',
('uint8_t','uint16_t','uint32_t',), umullCode)
# UMULLT
sveLongMulInst('umullt', 'Umullt', 'SimdAluOp',
('uint8_t','uint16_t','uint32_t',), umullCode,
uptTop = True)
# UQADD (immediate)
uqaddCode = '''
destElem = srcElem1 + srcElem2;
@@ -5129,6 +5349,13 @@ let {{
Ffr_ub[i] = POp1_ub[i];
}'''
svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False)
# XAR
xarCode = '''
destElem = AA64FpDestMerge_x[i] ^ srcElem1;
destElem = ((destElem >> srcElem2) |
(destElem << (sizeof(Element) * 8 - srcElem2)));
'''
sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode)
# ZIP1, ZIP2 (predicates)
zipPredIterCode = '''
constexpr unsigned sz = sizeof(Element);