arch-arm: Partial SVE2 Implementation (#657)
Instructions added: BGRP, RAX1, EOR3, BCAX, XAR & TBX, PMUL, PMULLB/T, SMULLB/T and UMULLB/T Move from gerrit [1] [1]: https://gem5-review.googlesource.com/c/public/gem5/+/70277 Change-Id: Ia135ba9300eae312b24342bcbda835fef6867113
This commit is contained in:
@@ -568,6 +568,193 @@ namespace Aarch64
|
||||
return new Unknown64(machInst);
|
||||
} // decodeSveIntArithUnpred
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveIntMulUnpred(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
uint8_t opc = bits(machInst, 11, 10);
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
|
||||
switch (opc) {
|
||||
case 0x1:
|
||||
if (size == 0x0) {
|
||||
return new SvePmul<uint8_t>(machInst, zd, zn, zm);
|
||||
}
|
||||
[[fallthrough]];
|
||||
case 0x0:
|
||||
// MUL (vectors, unpredicated)
|
||||
case 0x2:
|
||||
// SMULH (unpredicated)
|
||||
case 0x3:
|
||||
// UMULH (unpredicated)
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
|
||||
} // decodeSveIntMulUnpred
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveIntTerUnpred(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zk = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
uint8_t opc = bits(machInst, 23, 22) << 1 | bits(machInst, 10);
|
||||
|
||||
switch (opc) {
|
||||
case 0x0:
|
||||
return new SveEor3<uint64_t>(machInst, zdn, zm, zk);
|
||||
case 0x2:
|
||||
return new SveBcax<uint64_t>(machInst, zdn, zm, zk);
|
||||
case 0x1:
|
||||
// BSL
|
||||
case 0x3:
|
||||
// BSL1N
|
||||
case 0x5:
|
||||
// BSL2N
|
||||
case 0x7:
|
||||
// NBSL
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
} // decodeSveIntTerUnpred
|
||||
|
||||
StaticInstPtr
|
||||
decodeSve2IntMulLong(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
uint8_t opc_u_t = bits(machInst, 12, 10);
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
|
||||
switch (opc_u_t) {
|
||||
case 0x2:
|
||||
return decodeSveBinUnpredS2<SvePmullb>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x3:
|
||||
return decodeSveBinUnpredS2<SvePmullt>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x4:
|
||||
return decodeSveBinUnpredSigned<SveSmullb>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x5:
|
||||
return decodeSveBinUnpredSigned<SveSmullt>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x6:
|
||||
return decodeSveBinUnpredUnsigned<SveUmullb>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x7:
|
||||
return decodeSveBinUnpredUnsigned<SveUmullt>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x0:
|
||||
// SQDMULLB
|
||||
case 0x1:
|
||||
// SQDMULLT
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
} // decodeSve2IntMulLong
|
||||
|
||||
StaticInstPtr
|
||||
decodeSve2BitPerm(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
uint8_t opc = bits(machInst, 11, 10);
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
|
||||
switch (opc) {
|
||||
case 0x2:
|
||||
return decodeSveBinUnpredU<SveBgrp>(
|
||||
size, machInst, zd, zn, zm);
|
||||
case 0x0:
|
||||
// BEXT
|
||||
case 0x1:
|
||||
// BDEP
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
} // decodeSve2BitPerm
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveIntRotImm(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zdn = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
uint8_t imm3 = (RegIndex) (uint8_t) bits(machInst, 18, 16);
|
||||
|
||||
uint8_t tsize = (bits(machInst, 23, 22) << 2) | bits(machInst, 20, 19);
|
||||
uint8_t esize = 0;
|
||||
uint8_t size = 0;
|
||||
|
||||
if (tsize == 0x0) {
|
||||
return new Unknown64(machInst);
|
||||
} else if (tsize == 0x1) {
|
||||
esize = 8;
|
||||
} else if ((tsize & 0x0E) == 0x2) {
|
||||
esize = 16;
|
||||
size = 1;
|
||||
} else if ((tsize & 0x0C) == 0x4) {
|
||||
esize = 32;
|
||||
size = 2;
|
||||
} else if ((tsize & 0x08) == 0x8) {
|
||||
esize = 64;
|
||||
size = 3;
|
||||
}
|
||||
|
||||
unsigned rot_am = 2 * esize - ((tsize << 3) | imm3);
|
||||
return decodeSveBinImmDestrUnpredU<SveXar>(
|
||||
size, machInst, zdn, zm, rot_am);
|
||||
} // decodeSveIntRotImm
|
||||
|
||||
StaticInstPtr
|
||||
decodeSve2CryptBinConstr(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
uint8_t opc = bits(machInst, 10);
|
||||
uint8_t size_opc = (size << 1) | opc;
|
||||
|
||||
switch (size_opc) {
|
||||
case 0x1:
|
||||
return new SveRax1<uint64_t>(machInst, zd, zn, zm);
|
||||
case 0x0:
|
||||
// SM4EKEY
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
} // decodeSve2CryptBinConstr
|
||||
|
||||
StaticInstPtr
|
||||
decodeSve2WideIntArith(ExtMachInst machInst)
|
||||
{
|
||||
uint8_t op0 = bits(machInst, 14, 13);
|
||||
switch (op0) {
|
||||
case 0b11:
|
||||
return decodeSve2IntMulLong(machInst);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
StaticInstPtr
|
||||
decodeSve2Crypto(ExtMachInst machInst)
|
||||
{
|
||||
uint8_t op2 = bits(machInst, 12, 11);
|
||||
switch (op2) {
|
||||
case 0b10:
|
||||
return decodeSve2CryptBinConstr(machInst);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveIntLogUnpred(ExtMachInst machInst)
|
||||
{
|
||||
@@ -1100,12 +1287,19 @@ namespace Aarch64
|
||||
decodeSvePermUnpred(ExtMachInst machInst)
|
||||
{
|
||||
uint8_t b12_10 = bits(machInst, 12, 10);
|
||||
if (b12_10 == 0x4) {
|
||||
if ((b12_10 == 0x4) || (bits(machInst, 12, 11) == 0x1)) {
|
||||
unsigned size = (unsigned) bits(machInst, 23, 22);
|
||||
RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
|
||||
if (b12_10 == 0x4) { // TBL, two sources
|
||||
return decodeSveBinUnpredU<SveTbl>(size, machInst, zd, zn, zm);
|
||||
} else if (bits(machInst, 10) == 0x1) { // TBX
|
||||
return decodeSveBinUnpredU<SveTbx>(size, machInst, zd, zn, zm);
|
||||
// } else { // TBL, three sources
|
||||
// TBL, three sources
|
||||
}
|
||||
return new Unknown64(machInst);
|
||||
} else if (bits(machInst, 20, 16) == 0x0 && b12_10 == 0x6) {
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
RegIndex rn = makeSP(
|
||||
@@ -1450,7 +1644,6 @@ namespace Aarch64
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex pg = (RegIndex) (uint8_t) bits(machInst, 13, 10);
|
||||
RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
|
||||
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
|
||||
return decodeSveBinConstrPredU<SveSel>(size,
|
||||
@@ -3936,16 +4129,18 @@ namespace Aarch64
|
||||
} // decodeSveMemStore
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveMisc(ExtMachInst machInst) {
|
||||
decodeSveMisc(ExtMachInst machInst)
|
||||
{
|
||||
switch(bits(machInst, 13, 10)) {
|
||||
case 0b0110: {
|
||||
return decodeSveIntMatMulAdd(machInst);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return new Unknown64(machInst);
|
||||
break;
|
||||
}
|
||||
case 0b0110:
|
||||
return decodeSveIntMatMulAdd(machInst);
|
||||
case 0b1100:
|
||||
case 0b1101:
|
||||
case 0b1110:
|
||||
case 0b1111:
|
||||
return decodeSve2BitPerm(machInst);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
return new Unknown64(machInst);
|
||||
} // decodeSveMisc
|
||||
|
||||
@@ -45,7 +45,9 @@ namespace Aarch64
|
||||
StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntMulUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntTerUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveStackAlloc(ExtMachInst machInst);
|
||||
@@ -71,6 +73,12 @@ namespace Aarch64
|
||||
StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveClamp(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2Accum(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntRotImm(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2CryptBinConstr(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2BitPerm(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2IntMulLong(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2WideIntArith(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSve2Crypto(ExtMachInst machInst);
|
||||
|
||||
StaticInstPtr decodeSveIntegerDotProductUnpred(ExtMachInst machInst);
|
||||
StaticInstPtr decodeSveIntegerDotProductIndexed(ExtMachInst machInst);
|
||||
@@ -129,10 +137,14 @@ namespace Aarch64
|
||||
break;
|
||||
case 0b10:
|
||||
case 0b11:
|
||||
if (bits(machInst, 21) == 0b0 && op2 == 0b10) {
|
||||
if (bits(machInst, 21) == 0b0 && bits(op2, 1) == 0b0) {
|
||||
return decodeSve2WideIntArith(machInst);
|
||||
} else if (bits(machInst, 21) == 0b0 && op2 == 0b10) {
|
||||
return decodeSveMisc(machInst);
|
||||
} else if (bits(machInst, 21) == 0b0 && op2 == 0b11) {
|
||||
return decodeSve2Accum(machInst);
|
||||
} else if (bits(machInst, 21) == 0b1 && bits(machInst, 15, 13) == 0b111) {
|
||||
return decodeSve2Crypto(machInst);
|
||||
} else {
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
@@ -180,7 +192,15 @@ namespace Aarch64
|
||||
switch (b_15_14) {
|
||||
case 0x0:
|
||||
if (b_13) {
|
||||
return decodeSveIntLogUnpred(machInst);
|
||||
if (bits(machInst, 11)) {
|
||||
return decodeSveIntTerUnpred(machInst);
|
||||
} else {
|
||||
if (bits(machInst, 10)) {
|
||||
return decodeSveIntRotImm(machInst);
|
||||
} else {
|
||||
return decodeSveIntLogUnpred(machInst);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!bits(machInst, 30)) {
|
||||
return decodeSveIntArithUnpred(machInst);
|
||||
@@ -189,7 +209,7 @@ namespace Aarch64
|
||||
break;
|
||||
case 0x1:
|
||||
if (b_13) {
|
||||
return new Unknown64(machInst);
|
||||
return decodeSveIntMulUnpred(machInst);
|
||||
} else if (b_12) {
|
||||
return decodeSveStackAlloc(machInst);
|
||||
} else {
|
||||
|
||||
@@ -325,6 +325,28 @@ output header {{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Decodes binary with immediate operand, destructive, unpredicated
|
||||
// SVE instructions, handling unsigned variants only.
|
||||
template <template <typename T> class Base>
|
||||
StaticInstPtr
|
||||
decodeSveBinImmDestrUnpredU(unsigned size, ExtMachInst machInst,
|
||||
RegIndex dest, RegIndex op1, unsigned immediate)
|
||||
{
|
||||
switch (size) {
|
||||
case 0:
|
||||
return new Base<uint8_t>(machInst, dest, op1, immediate);
|
||||
case 1:
|
||||
return new Base<uint16_t>(machInst, dest, op1, immediate);
|
||||
case 2:
|
||||
return new Base<uint32_t>(machInst, dest, op1, immediate);
|
||||
case 3:
|
||||
return new Base<uint64_t>(machInst, dest, op1, immediate);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary with immediate operand, destructive, predicated (merging)
|
||||
// SVE instructions, handling unsigned variants only.
|
||||
template <template <typename T> class Base>
|
||||
@@ -611,6 +633,44 @@ output header {{
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary, constructive, unpredicated SVE instructions.
|
||||
// unsigned limited variants
|
||||
template <template <typename T> class Base>
|
||||
StaticInstPtr
|
||||
decodeSveBinUnpredUnsigned(unsigned size, ExtMachInst machInst,
|
||||
RegIndex dest, RegIndex op1, RegIndex op2)
|
||||
{
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new Base<uint8_t>(machInst, dest, op1, op2);
|
||||
case 2:
|
||||
return new Base<uint16_t>(machInst, dest, op1, op2);
|
||||
case 3:
|
||||
return new Base<uint32_t>(machInst, dest, op1, op2);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary, constructive, unpredicated SVE instructions.
|
||||
// signed limited variants
|
||||
template <template <typename T> class Base>
|
||||
StaticInstPtr
|
||||
decodeSveBinUnpredSigned(unsigned size, ExtMachInst machInst,
|
||||
RegIndex dest, RegIndex op1, RegIndex op2)
|
||||
{
|
||||
switch (size) {
|
||||
case 1:
|
||||
return new Base<int8_t>(machInst, dest, op1, op2);
|
||||
case 2:
|
||||
return new Base<int16_t>(machInst, dest, op1, op2);
|
||||
case 3:
|
||||
return new Base<int32_t>(machInst, dest, op1, op2);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary, constructive, unpredicated SVE instructions.
|
||||
// Unsigned instructions only.
|
||||
template <template <typename T> class Base>
|
||||
@@ -676,6 +736,25 @@ output header {{
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary, constructive, unpredicated SVE instructions.
|
||||
// unsigned instructions only, limited variants.
|
||||
template <template <typename T> class Base>
|
||||
StaticInstPtr
|
||||
decodeSveBinUnpredS2(unsigned size, ExtMachInst machInst, RegIndex dest,
|
||||
RegIndex op1, RegIndex op2)
|
||||
{
|
||||
switch (size) {
|
||||
case 0:
|
||||
return new Base<uint64_t>(machInst, dest, op1, op2);
|
||||
case 1:
|
||||
return new Base<uint8_t>(machInst, dest, op1, op2);
|
||||
case 3:
|
||||
return new Base<uint32_t>(machInst, dest, op1, op2);
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes binary, costructive, unpredicated SVE instructions, handling
|
||||
// floating-point variants only.
|
||||
template <template <typename T> class Base>
|
||||
@@ -1934,8 +2013,7 @@ let {{
|
||||
def sveBinInst(name, Name, opClass, types, op, predType=PredType.NONE,
|
||||
isDestructive=False, customIterCode=None,
|
||||
decoder='Generic'):
|
||||
assert not (predType in (PredType.NONE, PredType.SELECT) and
|
||||
isDestructive)
|
||||
assert not ((predType == PredType.SELECT) and isDestructive)
|
||||
global header_output, exec_output, decoders
|
||||
code = sveEnabledCheckCode + '''
|
||||
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
|
||||
@@ -1950,7 +2028,12 @@ let {{
|
||||
code += '''
|
||||
const Element& srcElem1 = AA64FpOp1_x[i];'''
|
||||
code += '''
|
||||
const Element& srcElem2 = AA64FpOp2_x[i];
|
||||
const Element& srcElem2 = AA64FpOp2_x[i];'''
|
||||
if (predType == PredType.NONE) and isDestructive:
|
||||
code += '''
|
||||
Element destElem = AA64FpDestMerge_x[i];'''
|
||||
else:
|
||||
code += '''
|
||||
Element destElem = 0;'''
|
||||
if predType != PredType.NONE:
|
||||
code += '''
|
||||
@@ -2762,8 +2845,8 @@ let {{
|
||||
'class_name' : 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generate definitions for SVE TBL instructions
|
||||
def sveTblInst(name, Name, opClass, decoder = 'Generic'):
|
||||
# Generate definitions for SVE table lookup instructions with 2 sources
|
||||
def sveTblInst(name, Name, opClass, decoder = 'Generic', merging = False):
|
||||
global header_output, exec_output, decoders
|
||||
code = sveEnabledCheckCode + '''
|
||||
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
|
||||
@@ -2774,10 +2857,10 @@ let {{
|
||||
if (idx < eCount) {
|
||||
val = AA64FpOp1_x[idx];
|
||||
} else {
|
||||
val = 0;
|
||||
val = %(dest_elem)s;;
|
||||
}
|
||||
AA64FpDest_x[i] = val;
|
||||
}'''
|
||||
}''' % {'dest_elem': 'AA64FpDestMerge_x[i]' if merging else '0'}
|
||||
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveTblOp',
|
||||
{'code': code, 'op_class': opClass}, [])
|
||||
header_output += SveBinUnpredOpDeclare.subst(iop)
|
||||
@@ -2787,6 +2870,63 @@ let {{
|
||||
'class_name' : 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generate definitions for integer add/subtract long with carry
|
||||
def sveLongCarryInst(name, Name, opClass, decoder = 'Generic',
|
||||
uptTop = False, subtract = False):
|
||||
global header_output, exec_output, decoders
|
||||
code = sveEnabledCheckCode + '''
|
||||
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
|
||||
xc->tcBase());
|
||||
for (int i = 0; i < eCount/2; ++i) {
|
||||
const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
|
||||
const Element& srcElem2 = AA64FpOp2_x[2*i+1];
|
||||
const Element& srcElem3 = AA64FpDestMerge_x[2*i];
|
||||
__uint128_t unsigned_sum = (__uint128_t)srcElem3 +
|
||||
(%(op)ssrcElem1) +
|
||||
(srcElem2 & 0x1);
|
||||
AA64FpDest_x[2*i] = (Element)unsigned_sum;
|
||||
AA64FpDest_x[2*i+1] = (Element)unsigned_sum !=
|
||||
(__uint128_t)unsigned_sum;
|
||||
}
|
||||
''' % {'offset': 1 if uptTop else 0,
|
||||
'op': '~' if subtract else '',
|
||||
}
|
||||
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
|
||||
{'code': code, 'op_class': opClass}, [])
|
||||
header_output += SveBinUnpredOpDeclare.subst(iop)
|
||||
exec_output += SveOpExecute.subst(iop)
|
||||
for type in ('uint32_t', 'uint64_t'):
|
||||
substDict = {'targs' : type,
|
||||
'class_name' : 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generate definitions for long integer/poly multiplication instruction
|
||||
def sveLongMulInst(name, Name, opClass, types, op, decoder = 'Generic',
|
||||
uptTop = False):
|
||||
global header_output, exec_output, decoders
|
||||
code = sveEnabledCheckCode + '''
|
||||
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
|
||||
xc->tcBase());
|
||||
for (int i = 0; i < eCount/2; ++i) {
|
||||
const Element& srcElem1 = AA64FpOp1_x[2*i+%(offset)s];
|
||||
const Element& srcElem2 = AA64FpOp2_x[2*i+%(offset)s];
|
||||
%(op)s
|
||||
AA64FpDest_x[2*i] = (Element)destElem;
|
||||
AA64FpDest_x[2*i+1] = (Element)(destElem >>
|
||||
(sizeof(Element) << 3));
|
||||
}
|
||||
''' % {'offset': 1 if uptTop else 0,
|
||||
'op': op,
|
||||
}
|
||||
iop = ArmInstObjParams(name, 'Sve' + Name, 'SveBinUnpredOp',
|
||||
{'code': code, 'op_class': opClass}, [])
|
||||
header_output += SveBinUnpredOpDeclare.subst(iop)
|
||||
exec_output += SveOpExecute.subst(iop)
|
||||
for type in types:
|
||||
substDict = {'targs' : type,
|
||||
'class_name' : 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generate definitions for SVE Unpack instructions
|
||||
def sveUnpackInst(name, Name, opClass, sdtypes, unpackHalf,
|
||||
regType, decoder = 'Generic'):
|
||||
@@ -3352,6 +3492,10 @@ let {{
|
||||
absCode = 'destElem = (Element) std::abs(srcElem1);'
|
||||
sveUnaryInst('abs', 'Abs', 'SimdAluOp', signedTypes, absCode,
|
||||
PredType.MERGE)
|
||||
# ADCLB
|
||||
sveLongCarryInst('adclb', 'Adclb', 'SimdAluOp')
|
||||
# ADCLT
|
||||
sveLongCarryInst('adclt', 'Adclt', 'SimdAluOp', uptTop = True)
|
||||
# ADD (immediate)
|
||||
sveWideImmInst('add', 'AddImm', 'SimdAddOp', unsignedTypes, addCode, False)
|
||||
# ADD (vectors, predicated)
|
||||
@@ -3457,6 +3601,29 @@ let {{
|
||||
'''
|
||||
sveBinInst('asrr', 'Asrr', 'SimdAluOp', unsignedTypes, asrrCode,
|
||||
PredType.MERGE, True)
|
||||
# BCAX
|
||||
bcaxCode = 'destElem ^= srcElem1 & (~srcElem2);'
|
||||
sveBinInst('bcax', 'Bcax', 'SimdAluOp', ('uint64_t',), bcaxCode,
|
||||
isDestructive=True)
|
||||
# BGRP
|
||||
bgrpCode = '''
|
||||
int k = 0;
|
||||
int len = sizeof(Element) * 8;
|
||||
for(int j = 0; j < len; j++) {
|
||||
if(((srcElem2>>j) & (Element)0x1) == ((Element)0x1)){
|
||||
destElem |= (((srcElem1>>j) & (Element)0x1) << k);
|
||||
k++;
|
||||
}
|
||||
}
|
||||
k = len-1;
|
||||
for(int j = len-1; j >= 0; j--) {
|
||||
if(((srcElem2>>j) & ((Element)0x1)) == ((Element)0x0)){
|
||||
destElem |= (((srcElem1>>j) & (Element)0x1) << k);
|
||||
k--;
|
||||
}
|
||||
}
|
||||
'''
|
||||
sveBinInst('bgrp', 'Bgrp', 'SimdAluOp', unsignedTypes, bgrpCode)
|
||||
# BIC (vectors, predicated)
|
||||
bicCode = 'destElem = srcElem1 & ~srcElem2;'
|
||||
sveBinInst('bic', 'BicPred', 'SimdAluOp', unsignedTypes, bicCode,
|
||||
@@ -3740,6 +3907,10 @@ let {{
|
||||
eorCode)
|
||||
svePredLogicalInst('eors', 'PredEors', 'SimdPredAluOp', ('uint8_t',),
|
||||
eorCode, isFlagSetting=True)
|
||||
# EOR3
|
||||
eorCode = 'destElem ^= srcElem1 ^ srcElem2;'
|
||||
sveBinInst('eor', 'Eor3', 'SimdAluOp', ('uint64_t',), eorCode,
|
||||
isDestructive=True)
|
||||
# EORV
|
||||
eorvCode = 'destElem ^= srcElem1;'
|
||||
sveAssocReducInst('eorv', 'Eorv', 'SimdReduceAluOp', unsignedTypes,
|
||||
@@ -4355,6 +4526,30 @@ let {{
|
||||
pfalseCode)
|
||||
# PFIRST
|
||||
svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
|
||||
# PMUL
|
||||
exec_output += '''
|
||||
__uint128_t poly_mul(uint64_t srcElem1, uint64_t srcElem2)
|
||||
{
|
||||
__uint128_t destElem = 0;
|
||||
__uint128_t extendedElem2 = srcElem2;
|
||||
int i;
|
||||
for (i=0; i < 64; i++) {
|
||||
if (((srcElem1 >> i) & 0x1) == 0x1) {
|
||||
destElem ^= (extendedElem2 << i);
|
||||
}
|
||||
}
|
||||
return destElem;
|
||||
}'''
|
||||
pmulCode = 'destElem = (uint8_t)poly_mul(srcElem1, srcElem2);'
|
||||
sveBinInst('pmul', 'Pmul', 'SimdAluOp', ('uint8_t',), pmulCode)
|
||||
# PMULLB
|
||||
pmullCode = '__uint128_t destElem = poly_mul(srcElem1, srcElem2);'
|
||||
sveLongMulInst('pmullb', 'Pmullb', 'SimdAluOp',
|
||||
('uint8_t','uint32_t','uint64_t',), pmullCode)
|
||||
# PMULLT
|
||||
sveLongMulInst('pmullt', 'Pmullt', 'SimdAluOp',
|
||||
('uint8_t','uint32_t','uint64_t',),
|
||||
pmullCode, uptTop = True)
|
||||
# PNEXT
|
||||
svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
|
||||
# PSEL
|
||||
@@ -4371,6 +4566,9 @@ let {{
|
||||
# PUNPKLO
|
||||
sveUnpackInst('punpklo', 'Punpklo', 'SimdPredAluOp', unsignedWideSDTypes,
|
||||
unpackHalf = Unpack.Low, regType = SrcRegType.Predicate)
|
||||
# RAX1
|
||||
rax1Code = 'destElem = srcElem1 ^ ((srcElem2 << 1) | (srcElem2 >> 63));'
|
||||
sveBinInst('rax', 'Rax1', 'SimdAluOp', ('uint64_t',), rax1Code)
|
||||
# RBIT
|
||||
rbitCode = '''
|
||||
destElem = reverseBits(srcElem1);'''
|
||||
@@ -4447,6 +4645,11 @@ let {{
|
||||
'''
|
||||
sveBinInst('sabd', 'Sabd', 'SimdAddOp', signedTypes, abdCode,
|
||||
PredType.MERGE, True)
|
||||
# SBCLB
|
||||
sveLongCarryInst('sbclb', 'Sbclb', 'SimdAluOp', subtract = True)
|
||||
# SBCLT
|
||||
sveLongCarryInst('sbclt', 'Sbclt', 'SimdAluOp', uptTop = True,
|
||||
subtract = True)
|
||||
# SADDV
|
||||
addvCode = 'destElem += srcElem1;'
|
||||
sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
|
||||
@@ -4608,6 +4811,13 @@ let {{
|
||||
destElem = do_mulh(srcElem1, srcElem2);'''
|
||||
sveBinInst('smulh', 'Smulh', 'SimdMultOp', signedTypes, mulhCode,
|
||||
PredType.MERGE, True)
|
||||
# SMULLB
|
||||
smullCode = 'int64_t destElem = (int64_t)srcElem1 * (int64_t)srcElem2;'
|
||||
sveLongMulInst('smullb', 'Smullb', 'SimdAluOp',
|
||||
('int8_t','int16_t','int32_t',), smullCode)
|
||||
# SMULLT
|
||||
sveLongMulInst('smullt', 'Smullt', 'SimdAluOp',
|
||||
('int8_t','int16_t','int32_t',), smullCode, uptTop = True)
|
||||
# SPLICE
|
||||
sveSpliceInst('splice', 'Splice', 'SimdAluOp', unsignedTypes)
|
||||
# SQADD (immediate)
|
||||
@@ -4793,6 +5003,8 @@ let {{
|
||||
sxtCode, PredType.MERGE)
|
||||
# TBL
|
||||
sveTblInst('tbl', 'Tbl', 'SimdAluOp')
|
||||
# TBX
|
||||
sveTblInst('tbx', 'Tbx', 'SimdAluOp', merging=True)
|
||||
# TRN1, TRN2 (predicates)
|
||||
trnPredIterCode = '''
|
||||
constexpr unsigned sz = sizeof(Element);
|
||||
@@ -4909,6 +5121,14 @@ let {{
|
||||
# UMULH
|
||||
sveBinInst('umulh', 'Umulh', 'SimdMultOp', unsignedTypes, mulhCode,
|
||||
PredType.MERGE, True)
|
||||
# UMULLB
|
||||
umullCode = 'uint64_t destElem = (uint64_t)srcElem1 * (uint64_t)srcElem2;'
|
||||
sveLongMulInst('umullb', 'Umullb', 'SimdAluOp',
|
||||
('uint8_t','uint16_t','uint32_t',), umullCode)
|
||||
# UMULLT
|
||||
sveLongMulInst('umullt', 'Umullt', 'SimdAluOp',
|
||||
('uint8_t','uint16_t','uint32_t',), umullCode,
|
||||
uptTop = True)
|
||||
# UQADD (immediate)
|
||||
uqaddCode = '''
|
||||
destElem = srcElem1 + srcElem2;
|
||||
@@ -5129,6 +5349,13 @@ let {{
|
||||
Ffr_ub[i] = POp1_ub[i];
|
||||
}'''
|
||||
svePredWriteFfrInst('wrffr', 'Wrffr', 'SimdPredAluOp', wrffrCode, False)
|
||||
# XAR
|
||||
xarCode = '''
|
||||
destElem = AA64FpDestMerge_x[i] ^ srcElem1;
|
||||
destElem = ((destElem >> srcElem2) |
|
||||
(destElem << (sizeof(Element) * 8 - srcElem2)));
|
||||
'''
|
||||
sveBinImmInst('xar', 'Xar', 'SimdAluOp', unsignedTypes, xarCode)
|
||||
# ZIP1, ZIP2 (predicates)
|
||||
zipPredIterCode = '''
|
||||
constexpr unsigned sz = sizeof(Element);
|
||||
|
||||
Reference in New Issue
Block a user