arch-arm: This commit adds the mla/s indexed versions
This includes the isa and instruction implementations of mla and mls indexed versions from ARM SVE2 ISA spec. Change-Id: I4fbd0382f23d8611e46411f74dc991f5a211a313
This commit is contained in:
@@ -435,6 +435,26 @@ SveTerPredOp::generateDisassembly(
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
SveTerIndexedOp::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
printMnemonic(ss, "", false);
|
||||
printVecReg(ss, dest, true);
|
||||
ccprintf(ss, ", ");
|
||||
printVecReg(ss, op1, true);
|
||||
ccprintf(ss, ", ");
|
||||
printVecReg(ss, op2, true);
|
||||
ccprintf(ss, "[");
|
||||
ccprintf(ss, "%lu", imm);
|
||||
ccprintf(ss, "]");
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::string
|
||||
SveTerUnpredOp::generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const
|
||||
|
||||
@@ -498,6 +498,27 @@ class SveTerPredOp : public ArmStaticInst
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
|
||||
/// Ternary, destructive, unpredicated , !INDEXED! SVE Instruction
|
||||
class SveTerIndexedOp : public ArmStaticInst
|
||||
{
|
||||
protected:
|
||||
RegIndex dest, op1, op2;
|
||||
uint16_t imm;
|
||||
uint8_t esize;
|
||||
|
||||
SveTerIndexedOp(const char* mnem, ExtMachInst _machInst,
|
||||
OpClass __opClass, RegIndex _dest,
|
||||
RegIndex _op1, RegIndex _op2, uint16_t _imm) :
|
||||
ArmStaticInst(mnem, _machInst, __opClass),
|
||||
dest(_dest), op1(_op1), op2(_op2) , imm(_imm)
|
||||
{}
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/// Ternary, destructive, unpredicated SVE instruction.
|
||||
class SveTerUnpredOp : public ArmStaticInst
|
||||
{
|
||||
|
||||
@@ -245,6 +245,59 @@ namespace Aarch64
|
||||
return new Unknown64(machInst);
|
||||
} // decodeSveIntMulAdd
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveMultiplyAccIndexed(ExtMachInst machInst)
|
||||
{
|
||||
RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
|
||||
RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
|
||||
RegIndex zm ;
|
||||
|
||||
//= (RegIndex) (uint8_t) bits(machInst, 19, 16);
|
||||
|
||||
uint8_t size = bits(machInst, 23, 22);
|
||||
uint16_t imm;
|
||||
uint8_t opc = (bits(machInst, 10));
|
||||
|
||||
switch(size) {
|
||||
case 0b00:
|
||||
case 0b01:
|
||||
zm = (RegIndex)(uint8_t)bits(machInst, 18, 16);
|
||||
imm = (uint16_t)(bits(machInst, 22) << 2)
|
||||
| bits(machInst, 20, 19);
|
||||
switch(opc) {
|
||||
case 0x0: return new Sve2Mlai<int16_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
case 0x1: return new Sve2Mlsi<int16_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0b10:
|
||||
zm = (RegIndex)(uint8_t)bits(machInst, 18, 16);
|
||||
imm = (uint16_t)bits(machInst, 20, 19);
|
||||
switch(opc) {
|
||||
case 0x0: return new Sve2Mlai<int32_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
case 0x1: return new Sve2Mlsi<int32_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
}
|
||||
break;
|
||||
|
||||
case 0b11:
|
||||
zm = (RegIndex)(uint8_t)bits(machInst, 19, 16);
|
||||
imm = (uint16_t)bits(machInst, 20);
|
||||
switch(opc) {
|
||||
case 0x0: return new Sve2Mlai<int64_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
case 0x1: return new Sve2Mlsi<int64_t>(
|
||||
machInst, zda, zn, zm, imm);
|
||||
}
|
||||
break;
|
||||
}
|
||||
return new Unknown64(machInst);
|
||||
|
||||
} // decodeSveMultiplyAccIndexed
|
||||
|
||||
StaticInstPtr
|
||||
decodeSveIntMatMulAdd(ExtMachInst machInst)
|
||||
{
|
||||
@@ -3920,6 +3973,11 @@ namespace Aarch64
|
||||
return decodeSveIntegerDotProductIndexed(machInst);
|
||||
case 0b11:
|
||||
return decodeSveMixedSignDotProductIndexed(machInst);
|
||||
|
||||
// for mla/s indexed , can be renamed
|
||||
case 0b01:
|
||||
return decodeSveMultiplyAccIndexed(machInst);
|
||||
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
|
||||
@@ -2096,6 +2096,34 @@ let {{
|
||||
'class_name' : 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generates definitions for ternary SVE instructions (indexed)
|
||||
def sveTerInstIndexed(name, Name, opClass, types, op, decoder='Generic'):
|
||||
global header_output, exec_output, decoders
|
||||
code = sveEnabledCheckCode + '''
|
||||
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
|
||||
xc->tcBase());
|
||||
for (unsigned i = 0; i < eCount; i++) {
|
||||
int segbase = i - i % (128 / sizeof(Element));
|
||||
int s = segbase + imm;
|
||||
const Element& srcElem1 = AA64FpOp1_x[i];
|
||||
const Element& srcElem2 = AA64FpOp2_x[s];
|
||||
Element destElem = AA64FpDestMerge_x[i];
|
||||
'''
|
||||
|
||||
code += f"{op} \n"
|
||||
|
||||
|
||||
code += ''' AA64FpDest_x[i] = destElem;
|
||||
}'''
|
||||
iop = ArmInstObjParams(name, 'Sve2' + Name+ 'i', 'SveTerIndexedOp',
|
||||
{'code': code, 'op_class': opClass}, [])
|
||||
header_output += SveTerIndexedOpDeclare.subst(iop)
|
||||
exec_output += SveOpExecute.subst(iop)
|
||||
for type in types:
|
||||
substDict = {'targs' : type,
|
||||
'class_name' : 'Sve2' + Name + "i"}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
# Generates definitions for ternary SVE intructions (always predicated -
|
||||
# merging)
|
||||
def sveTerInst(name, Name, opClass, types, op, decoder='Generic'):
|
||||
@@ -3094,6 +3122,9 @@ let {{
|
||||
substDict = {'targs': type, 'class_name': 'Sve' + Name}
|
||||
exec_output += SveOpExecDeclare.subst(substDict)
|
||||
|
||||
|
||||
|
||||
|
||||
# Generate definition for DOT instructions
|
||||
def sveDotInst(name, Name, opClass, types, isIndexed = True):
|
||||
global header_output, exec_output, decoders
|
||||
@@ -4244,9 +4275,15 @@ let {{
|
||||
# MLA
|
||||
mlaCode = 'destElem += srcElem1 * srcElem2;'
|
||||
sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode)
|
||||
#indexed
|
||||
sveTerInstIndexed('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode)
|
||||
|
||||
# MLS
|
||||
mlsCode = 'destElem -= srcElem1 * srcElem2;'
|
||||
sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
|
||||
#indexed
|
||||
sveTerInstIndexed('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
|
||||
|
||||
# ADCLT
|
||||
adcltCode = 'res = srcElem1 + srcElem2 + carryIn;'
|
||||
sveTerInstUnpred('adclt', 'Adclt', 'VectorIntegerArithOp', unsignedTypes,
|
||||
|
||||
@@ -515,6 +515,44 @@ class %(class_name)s : public %(base_class)s
|
||||
};
|
||||
}};
|
||||
|
||||
|
||||
|
||||
def template SveTerIndexedOpDeclare {{
|
||||
/*
|
||||
For mla indexed version as it is not included in gem5 right now.
|
||||
Using ternary ops but all ops are vector regs.
|
||||
index is the imm here. (name can be changed)
|
||||
*/
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
//static_assert(sizeof(_SElementA) == sizeof(_SElementB),
|
||||
// "Source elements must have the same size.");
|
||||
|
||||
private:
|
||||
%(reg_idx_arr_decl)s;
|
||||
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
typedef _Element TPElem;
|
||||
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst, RegIndex _dest,
|
||||
RegIndex _op1, RegIndex _op2, uint16_t _imm) :
|
||||
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _op2,_imm)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
esize = sizeof(Element);
|
||||
}
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
};
|
||||
}};
|
||||
|
||||
|
||||
def template SveMatMulOpDeclare {{
|
||||
template <typename DestElement,
|
||||
typename SrcElementA,
|
||||
@@ -568,6 +606,8 @@ class %(class_name)s : public %(base_class)s
|
||||
};
|
||||
}};
|
||||
|
||||
|
||||
|
||||
def template SveReducOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
|
||||
Reference in New Issue
Block a user