From 35ccd7f90764f7d4db59efd66bbbe4c1e48496d7 Mon Sep 17 00:00:00 2001 From: Nitesh Narayana Date: Fri, 24 Nov 2023 15:20:30 +0100 Subject: [PATCH 1/3] arch-arm: This commit adds the mla/s indexed versions This includes the isa and instruction implementations of mla and mls indexed versions from ARM SVE2 ISA spec. Change-Id: I4fbd0382f23d8611e46411f74dc991f5a211a313 --- src/arch/arm/insts/sve.cc | 20 ++++++++ src/arch/arm/insts/sve.hh | 21 ++++++++ src/arch/arm/isa/formats/sve_2nd_level.isa | 58 ++++++++++++++++++++++ src/arch/arm/isa/insts/sve.isa | 37 ++++++++++++++ src/arch/arm/isa/templates/sve.isa | 40 +++++++++++++++ 5 files changed, 176 insertions(+) diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc index b0512817a8..240a7fb116 100644 --- a/src/arch/arm/insts/sve.cc +++ b/src/arch/arm/insts/sve.cc @@ -435,6 +435,26 @@ SveTerPredOp::generateDisassembly( return ss.str(); } + +std::string +SveTerIndexedOp::generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss, "", false); + printVecReg(ss, dest, true); + ccprintf(ss, ", "); + printVecReg(ss, op1, true); + ccprintf(ss, ", "); + printVecReg(ss, op2, true); + ccprintf(ss, "["); + ccprintf(ss, "%lu", imm); + ccprintf(ss, "]"); + return ss.str(); +} + + + std::string SveTerUnpredOp::generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh index dc18ff30a7..c23a975c8b 100644 --- a/src/arch/arm/insts/sve.hh +++ b/src/arch/arm/insts/sve.hh @@ -498,6 +498,27 @@ class SveTerPredOp : public ArmStaticInst Addr pc, const loader::SymbolTable *symtab) const override; }; +/// Ternary, destructive, unpredicated , !INDEXED! SVE Instruction +class SveTerIndexedOp : public ArmStaticInst +{ + protected: + RegIndex dest, op1, op2; + uint16_t imm; + uint8_t esize; + + SveTerIndexedOp(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, RegIndex _dest, + RegIndex _op1, RegIndex _op2, uint16_t _imm) : + ArmStaticInst(mnem, _machInst, __opClass), + dest(_dest), op1(_op1), op2(_op2) , imm(_imm) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + + + /// Ternary, destructive, unpredicated SVE instruction. class SveTerUnpredOp : public ArmStaticInst { diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 86c174d7c4..83c811cafd 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -245,6 +245,59 @@ namespace Aarch64 return new Unknown64(machInst); } // decodeSveIntMulAdd + StaticInstPtr + decodeSveMultiplyAccIndexed(ExtMachInst machInst) + { + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm ; + + //= (RegIndex) (uint8_t) bits(machInst, 19, 16); + + uint8_t size = bits(machInst, 23, 22); + uint16_t imm; + uint8_t opc = (bits(machInst, 10)); + + switch(size) { + case 0b00: + case 0b01: + zm = (RegIndex)(uint8_t)bits(machInst, 18, 16); + imm = (uint16_t)(bits(machInst, 22) << 2) + | bits(machInst, 20, 19); + switch(opc) { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm, imm); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm, imm); + } + break; + + case 0b10: + zm = (RegIndex)(uint8_t)bits(machInst, 18, 16); + imm = (uint16_t)bits(machInst, 20, 19); + switch(opc) { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm, imm); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm, imm); + } + break; + + case 0b11: + zm = (RegIndex)(uint8_t)bits(machInst, 19, 16); + imm = (uint16_t)bits(machInst, 20); + switch(opc) { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm, imm); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm, imm); + } + break; + } + return new Unknown64(machInst); + + } // decodeSveMultiplyAccIndexed + StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst) { @@ -3920,6 +3973,11 @@ namespace Aarch64 return decodeSveIntegerDotProductIndexed(machInst); case 0b11: return decodeSveMixedSignDotProductIndexed(machInst); + + // for mla/s indexed , can be renamed + case 0b01: + return decodeSveMultiplyAccIndexed(machInst); + default: return new Unknown64(machInst); } diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 9999843b59..e222f97f6f 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -2096,6 +2096,34 @@ let {{ 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) + # Generates definitions for ternary SVE instructions (indexed) + def sveTerInstIndexed(name, Name, opClass, types, op, decoder='Generic'): + global header_output, exec_output, decoders + code = sveEnabledCheckCode + ''' + unsigned eCount = ArmStaticInst::getCurSveVecLen( + xc->tcBase()); + for (unsigned i = 0; i < eCount; i++) { + int segbase = i - i % (128 / sizeof(Element)); + int s = segbase + imm; + const Element& srcElem1 = AA64FpOp1_x[i]; + const Element& srcElem2 = AA64FpOp2_x[s]; + Element destElem = AA64FpDestMerge_x[i]; + ''' + + code += f"{op} \n" + + + code += ''' AA64FpDest_x[i] = destElem; + }''' + iop = ArmInstObjParams(name, 'Sve2' + Name+ 'i', 'SveTerIndexedOp', + {'code': code, 'op_class': opClass}, []) + header_output += SveTerIndexedOpDeclare.subst(iop) + exec_output += SveOpExecute.subst(iop) + for type in types: + substDict = {'targs' : type, + 'class_name' : 'Sve2' + Name + "i"} + exec_output += SveOpExecDeclare.subst(substDict) + # Generates definitions for ternary SVE intructions (always predicated - # merging) def sveTerInst(name, Name, opClass, types, op, decoder='Generic'): @@ -3094,6 +3122,9 @@ let {{ substDict = {'targs': type, 'class_name': 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) + + + # Generate definition for DOT instructions def sveDotInst(name, Name, opClass, types, isIndexed = True): global header_output, exec_output, decoders @@ -4244,9 +4275,15 @@ let {{ # MLA mlaCode = 'destElem += srcElem1 * srcElem2;' sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode) + #indexed + sveTerInstIndexed('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode) + # MLS mlsCode = 'destElem -= srcElem1 * srcElem2;' sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode) + #indexed + sveTerInstIndexed('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode) + # ADCLT adcltCode = 'res = srcElem1 + srcElem2 + carryIn;' sveTerInstUnpred('adclt', 'Adclt', 'VectorIntegerArithOp', unsignedTypes, diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index 813bda029d..b254b1b43d 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -515,6 +515,44 @@ class %(class_name)s : public %(base_class)s }; }}; + + +def template SveTerIndexedOpDeclare {{ + /* + For mla indexed version as it is not included in gem5 right now. + Using ternary ops but all ops are vector regs. + index is the imm here. (name can be changed) +*/ +template +class %(class_name)s : public %(base_class)s +{ + //static_assert(sizeof(_SElementA) == sizeof(_SElementB), + // "Source elements must have the same size."); + + private: + %(reg_idx_arr_decl)s; + + protected: + typedef _Element Element; + typedef _Element TPElem; + + public: + // Constructor + %(class_name)s(ExtMachInst machInst, RegIndex _dest, + RegIndex _op1, RegIndex _op2, uint16_t _imm) : + %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dest, _op1, _op2,_imm) + { + %(set_reg_idx_arr)s; + %(constructor)s; + esize = sizeof(Element); + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; +}; +}}; + + def template SveMatMulOpDeclare {{ template class %(class_name)s : public %(base_class)s From db8e1652e8f0c39a55fa2c74a83d8637086337d6 Mon Sep 17 00:00:00 2001 From: Nitesh Narayana Date: Tue, 5 Dec 2023 23:40:06 +0100 Subject: [PATCH 2/3] arch-arm: This commit uses existing template code for mla/s index This includes mla/s index version implementation using the existing template code to avoid code repeatition. Change-Id: If1de84e01dec638e206c979ca832308ebc904212 --- src/arch/arm/insts/sve.cc | 20 --------- src/arch/arm/insts/sve.hh | 21 ---------- src/arch/arm/isa/formats/sve_2nd_level.isa | 48 ++++++++++++---------- src/arch/arm/isa/insts/sve.isa | 32 +-------------- src/arch/arm/isa/templates/sve.isa | 34 --------------- 5 files changed, 29 insertions(+), 126 deletions(-) diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc index 240a7fb116..b0512817a8 100644 --- a/src/arch/arm/insts/sve.cc +++ b/src/arch/arm/insts/sve.cc @@ -435,26 +435,6 @@ SveTerPredOp::generateDisassembly( return ss.str(); } - -std::string -SveTerIndexedOp::generateDisassembly( - Addr pc, const loader::SymbolTable *symtab) const -{ - std::stringstream ss; - printMnemonic(ss, "", false); - printVecReg(ss, dest, true); - ccprintf(ss, ", "); - printVecReg(ss, op1, true); - ccprintf(ss, ", "); - printVecReg(ss, op2, true); - ccprintf(ss, "["); - ccprintf(ss, "%lu", imm); - ccprintf(ss, "]"); - return ss.str(); -} - - - std::string SveTerUnpredOp::generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh index c23a975c8b..dc18ff30a7 100644 --- a/src/arch/arm/insts/sve.hh +++ b/src/arch/arm/insts/sve.hh @@ -498,27 +498,6 @@ class SveTerPredOp : public ArmStaticInst Addr pc, const loader::SymbolTable *symtab) const override; }; -/// Ternary, destructive, unpredicated , !INDEXED! SVE Instruction -class SveTerIndexedOp : public ArmStaticInst -{ - protected: - RegIndex dest, op1, op2; - uint16_t imm; - uint8_t esize; - - SveTerIndexedOp(const char* mnem, ExtMachInst _machInst, - OpClass __opClass, RegIndex _dest, - RegIndex _op1, RegIndex _op2, uint16_t _imm) : - ArmStaticInst(mnem, _machInst, __opClass), - dest(_dest), op1(_op1), op2(_op2) , imm(_imm) - {} - - std::string generateDisassembly( - Addr pc, const loader::SymbolTable *symtab) const override; -}; - - - /// Ternary, destructive, unpredicated SVE instruction. class SveTerUnpredOp : public ArmStaticInst { diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 83c811cafd..dce4f9e998 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -250,50 +250,56 @@ namespace Aarch64 { RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); - RegIndex zm ; - - //= (RegIndex) (uint8_t) bits(machInst, 19, 16); - uint8_t size = bits(machInst, 23, 22); - uint16_t imm; uint8_t opc = (bits(machInst, 10)); switch(size) { case 0b00: case 0b01: - zm = (RegIndex)(uint8_t)bits(machInst, 18, 16); - imm = (uint16_t)(bits(machInst, 22) << 2) + { + + RegIndex zm_16 = (RegIndex)(uint8_t)bits(machInst, 18, 16); + uint8_t imm_16 = (uint8_t)(bits(machInst, 22) << 2) | bits(machInst, 20, 19); - switch(opc) { - case 0x0: return new Sve2Mlai( - machInst, zda, zn, zm, imm); - case 0x1: return new Sve2Mlsi( - machInst, zda, zn, zm, imm); - } + switch(opc) + { + case 0x0: return new Sve2Mlai( + machInst, zda, zn, zm_16, imm_16); + case 0x1: return new Sve2Mlsi( + machInst, zda, zn, zm_16, imm_16); + } + } break; case 0b10: - zm = (RegIndex)(uint8_t)bits(machInst, 18, 16); - imm = (uint16_t)bits(machInst, 20, 19); + { + + RegIndex zm_32 = (RegIndex)(uint8_t)bits(machInst, 18, 16); + uint8_t imm_32 = (uint8_t)bits(machInst, 20, 19); switch(opc) { case 0x0: return new Sve2Mlai( - machInst, zda, zn, zm, imm); + machInst, zda, zn, zm_32, imm_32); case 0x1: return new Sve2Mlsi( - machInst, zda, zn, zm, imm); + machInst, zda, zn, zm_32, imm_32); } + } break; case 0b11: - zm = (RegIndex)(uint8_t)bits(machInst, 19, 16); - imm = (uint16_t)bits(machInst, 20); + { + + RegIndex zm_64 = (RegIndex)(uint8_t)bits(machInst, 19, 16); + uint8_t imm_64 = (uint8_t)bits(machInst, 20); switch(opc) { case 0x0: return new Sve2Mlai( - machInst, zda, zn, zm, imm); + machInst, zda, zn, zm_64, imm_64); case 0x1: return new Sve2Mlsi( - machInst, zda, zn, zm, imm); + machInst, zda, zn, zm_64, imm_64); } + } break; } + return new Unknown64(machInst); } // decodeSveMultiplyAccIndexed diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index e222f97f6f..e206106c61 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -2096,34 +2096,6 @@ let {{ 'class_name' : 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) - # Generates definitions for ternary SVE instructions (indexed) - def sveTerInstIndexed(name, Name, opClass, types, op, decoder='Generic'): - global header_output, exec_output, decoders - code = sveEnabledCheckCode + ''' - unsigned eCount = ArmStaticInst::getCurSveVecLen( - xc->tcBase()); - for (unsigned i = 0; i < eCount; i++) { - int segbase = i - i % (128 / sizeof(Element)); - int s = segbase + imm; - const Element& srcElem1 = AA64FpOp1_x[i]; - const Element& srcElem2 = AA64FpOp2_x[s]; - Element destElem = AA64FpDestMerge_x[i]; - ''' - - code += f"{op} \n" - - - code += ''' AA64FpDest_x[i] = destElem; - }''' - iop = ArmInstObjParams(name, 'Sve2' + Name+ 'i', 'SveTerIndexedOp', - {'code': code, 'op_class': opClass}, []) - header_output += SveTerIndexedOpDeclare.subst(iop) - exec_output += SveOpExecute.subst(iop) - for type in types: - substDict = {'targs' : type, - 'class_name' : 'Sve2' + Name + "i"} - exec_output += SveOpExecDeclare.subst(substDict) - # Generates definitions for ternary SVE intructions (always predicated - # merging) def sveTerInst(name, Name, opClass, types, op, decoder='Generic'): @@ -4276,13 +4248,13 @@ let {{ mlaCode = 'destElem += srcElem1 * srcElem2;' sveTerInst('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode) #indexed - sveTerInstIndexed('mla', 'Mla', 'SimdMultAccOp', signedTypes, mlaCode) + sveTerIdxInst('mla', '2Mlai', 'SimdMultAccOp', signedTypes, mlaCode) # MLS mlsCode = 'destElem -= srcElem1 * srcElem2;' sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode) #indexed - sveTerInstIndexed('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode) + sveTerIdxInst('mls', '2Mlsi', 'SimdMultAccOp', signedTypes, mlsCode) # ADCLT adcltCode = 'res = srcElem1 + srcElem2 + carryIn;' diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index b254b1b43d..ccca96022c 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -517,40 +517,6 @@ class %(class_name)s : public %(base_class)s -def template SveTerIndexedOpDeclare {{ - /* - For mla indexed version as it is not included in gem5 right now. - Using ternary ops but all ops are vector regs. - index is the imm here. (name can be changed) -*/ -template -class %(class_name)s : public %(base_class)s -{ - //static_assert(sizeof(_SElementA) == sizeof(_SElementB), - // "Source elements must have the same size."); - - private: - %(reg_idx_arr_decl)s; - - protected: - typedef _Element Element; - typedef _Element TPElem; - - public: - // Constructor - %(class_name)s(ExtMachInst machInst, RegIndex _dest, - RegIndex _op1, RegIndex _op2, uint16_t _imm) : - %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, - _dest, _op1, _op2,_imm) - { - %(set_reg_idx_arr)s; - %(constructor)s; - esize = sizeof(Element); - } - - Fault execute(ExecContext *, trace::InstRecord *) const override; -}; -}}; def template SveMatMulOpDeclare {{ From d962d2588d2102913d903d151a7de79b3cd2d3be Mon Sep 17 00:00:00 2001 From: Nitesh Narayana Date: Wed, 6 Dec 2023 16:03:21 +0100 Subject: [PATCH 3/3] arch-arm: This commit cleans .isa files This commit cleans extra new lines from .isa files from this branch Change-Id: I4087ed230aa041747038b49360c2aba3f82c0790 --- src/arch/arm/isa/insts/sve.isa | 3 --- src/arch/arm/isa/templates/sve.isa | 6 ------ 2 files changed, 9 deletions(-) diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index e206106c61..e0e44cdf3d 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -3094,9 +3094,6 @@ let {{ substDict = {'targs': type, 'class_name': 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) - - - # Generate definition for DOT instructions def sveDotInst(name, Name, opClass, types, isIndexed = True): global header_output, exec_output, decoders diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index ccca96022c..813bda029d 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -515,10 +515,6 @@ class %(class_name)s : public %(base_class)s }; }}; - - - - def template SveMatMulOpDeclare {{ template class %(class_name)s : public %(base_class)s