diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa index 0d12a226e5..86c174d7c4 100644 --- a/src/arch/arm/isa/formats/sve_2nd_level.isa +++ b/src/arch/arm/isa/formats/sve_2nd_level.isa @@ -2256,19 +2256,19 @@ namespace Aarch64 uint8_t usig = (uint8_t) bits(machInst, 10); if (size & 0x1) { if (usig) { - return new SveUdotv(machInst, - zda, zn, zm); + return new SveUdotv + (machInst, zda, zn, zm); } else { - return new SveSdotv(machInst, - zda, zn, zm); + return new SveSdotv + (machInst, zda, zn, zm); } } else { if (usig) { - return new SveUdotv(machInst, - zda, zn, zm); + return new SveUdotv + (machInst, zda, zn, zm); } else { - return new SveSdotv(machInst, - zda, zn, zm); + return new SveSdotv + (machInst, zda, zn, zm); } } @@ -2292,21 +2292,21 @@ namespace Aarch64 RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16); uint8_t i1 = (uint8_t) bits(machInst, 20); if (usig) { - return new SveUdoti(machInst, - zda, zn, zm, i1); + return new SveUdoti + (machInst, zda, zn, zm, i1); } else { - return new SveSdoti(machInst, - zda, zn, zm, i1); + return new SveSdoti + (machInst, zda, zn, zm, i1); } } else { RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); uint8_t i2 = (uint8_t) bits(machInst, 20, 19); if (usig) { - return new SveUdoti(machInst, - zda, zn, zm, i2); + return new SveUdoti + (machInst, zda, zn, zm, i2); } else { - return new SveSdoti(machInst, - zda, zn, zm, i2); + return new SveSdoti + (machInst, zda, zn, zm, i2); } } return new Unknown64(machInst); @@ -2320,16 +2320,12 @@ namespace Aarch64 return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 20, 16); + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16); - // Placeholder for SveUsdotv - //return SveUsdotv(machInst, zda, zn, zm); - return new Unknown64(machInst); + return new SveUsdotv + (machInst, zda, zn, zm); } // decodeSveMixedSignDotProduct StaticInstPtr @@ -2340,26 +2336,18 @@ namespace Aarch64 return new Unknown64(machInst); } - RegIndex zda M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 4, 0); - RegIndex zn M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 9, 5); - RegIndex zm M5_VAR_USED = (RegIndex) - (uint8_t) bits(machInst, 18, 16); - uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19); - + RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0); + RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5); + RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16); + uint8_t i2 = (uint8_t) bits(machInst, 20, 19); uint8_t usig = (uint8_t) bits(machInst, 10); if (usig) { - // Placeholder for SveSudoti - //return SveSudoti - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveSudoti + (machInst, zda, zn, zm, i2); } else { - // Placeholder for SveUsdoti - //return SveUsdoti - // (machInst, zda, zn, zm, i2); - return new Unknown64(machInst); + return new SveUsdoti + (machInst, zda, zn, zm, i2); } } // decodeSveMixedSignDotProductIndexed diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa index 6e8313bda5..e7a773e3df 100644 --- a/src/arch/arm/isa/insts/sve.isa +++ b/src/arch/arm/isa/insts/sve.isa @@ -3098,6 +3098,15 @@ let {{ def sveDotInst(name, Name, opClass, types, isIndexed = True): global header_output, exec_output, decoders code = sveEnabledCheckCode + ''' + // Types of the extended versions of the source elements. + // Required to make sure the intermediate calculations don't overflow. + using ExtendedElementA = typename vector_element_traits:: + extend_element::type; + using ExtendedElementB = typename vector_element_traits:: + extend_element::type; + unsigned eCount = ArmStaticInst::getCurSveVecLen( xc->tcBase()); for (int i = 0; i < eCount; ++i) {''' @@ -3107,17 +3116,21 @@ let {{ int s = segbase + imm;''' code += ''' DElement res = AA64FpDest_xd[i]; - DElement srcElem1, srcElem2; + ExtendedElementA srcElemA; + ExtendedElementB srcElemB; for (int j = 0; j <= 3; ++j) { - srcElem1 = static_cast(AA64FpOp1_xs[4 * i + j]);''' + srcElemA = static_cast + (AA64FpOp1_srcA[4 * i + j]);''' if isIndexed: code += ''' - srcElem2 = static_cast(AA64FpOp2_xs[4 * s + j]);''' + srcElemB = static_cast + (AA64FpOp2_srcB[4 * s + j]);''' else: code += ''' - srcElem2 = static_cast(AA64FpOp2_xs[4 * i + j]);''' + srcElemB = static_cast + (AA64FpOp2_srcB[4 * i + j]);''' code += ''' - res += srcElem1 * srcElem2; + res += srcElemA * srcElemB; } AA64FpDestMerge_xd[i] = res; }''' @@ -3129,7 +3142,7 @@ let {{ header_output += SveWideningTerImmOpDeclare.subst(iop) else: header_output += SveWideningTerOpDeclare.subst(iop) - exec_output += SveWideningOpExecute.subst(iop) + exec_output += SveWideningTerOpExecute.subst(iop) for type in types: substDict = {'targs': type, 'class_name': 'Sve' + Name} exec_output += SveOpExecDeclare.subst(substDict) @@ -4468,11 +4481,14 @@ let {{ sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode, PredType.MERGE, True) # SDOT (indexed) - sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = True) + sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = True) # SDOT (vectors) - sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t', - 'int16_t, int64_t'], isIndexed = False) + sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t', + 'int16_t, int16_t, int64_t'], isIndexed = False) + # SUDOT (indexed) + sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t, int32_t'], + isIndexed = True) # SEL (predicates) selCode = 'destElem = srcElem1;' svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',), @@ -4857,11 +4873,17 @@ let {{ sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode, PredType.MERGE, True) # UDOT (indexed) - sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = True) + sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = True) # UDOT (vectors) - sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t', - 'uint16_t, uint64_t'], isIndexed = False) + sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t', + 'uint16_t, uint16_t, uint64_t'], isIndexed = False) + # USDOT (indexed) + sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = True) + # USDOT (vectors) + sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t, int32_t'], + isIndexed = False) # UMAX (immediate) sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode) # UMAX (vectors) diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa index 65abb1bcbb..813bda029d 100644 --- a/src/arch/arm/isa/templates/sve.isa +++ b/src/arch/arm/isa/templates/sve.isa @@ -1139,17 +1139,22 @@ class %(class_name)s : public %(base_class)s }}; def template SveWideningTerImmOpDeclare {{ -template +template class %(class_name)s : public %(base_class)s { + static_assert(sizeof(_SElementA) == sizeof(_SElementB), + "Source elements must have the same size."); + private: %(reg_idx_arr_decl)s; protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1168,7 +1173,7 @@ class %(class_name)s : public %(base_class)s }}; def template SveWideningTerOpDeclare {{ -template +template class %(class_name)s : public %(base_class)s { private: @@ -1176,9 +1181,11 @@ class %(class_name)s : public %(base_class)s protected: typedef _DElement Element; - typedef _SElement SElement; + typedef _SElementA SElementA; + typedef _SElementB SElementB; typedef _DElement DElement; - typedef _SElement TPSElem; + typedef _SElementA TPSrcAElem; + typedef _SElementB TPSrcBElem; typedef _DElement TPDElem; public: @@ -1295,6 +1302,26 @@ def template SveWideningOpExecute {{ } }}; +def template SveWideningTerOpExecute {{ + template + Fault %(class_name)s::execute + (ExecContext *xc, + trace::InstRecord *traceData) const + { + Fault fault = NoFault; + %(op_decl)s; + %(op_rd)s; + + %(code)s; + if (fault == NoFault) + { + %(op_wb)s; + } + + return fault; + } +}}; + def template SveNonTemplatedOpExecute {{ Fault %(class_name)s::execute(ExecContext *xc,