From 72e4f614a2ecf81fa4053d973805276da451f539 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Wed, 3 Aug 2022 14:54:04 +0100 Subject: [PATCH] arch-arm: Add interfaces to set and get SME vector length We add interfaces which roughly mirror those already present for manipulating the SVE vector lengths to set/get the SME vector length. In the case of the SME vector length we also need to do some checking to ensure that the vector length itself is aligned to a whole power of two (one of the SME requirements). Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289 Change-Id: Ib89a4804466f5445adea6de8d65df512e366d618 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64336 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Reviewed-by: Giacomo Travaglini Tested-by: kokoro --- src/arch/arm/decoder.cc | 4 ++ src/arch/arm/decoder.hh | 12 ++++++ src/arch/arm/insts/static_inst.cc | 8 ++++ src/arch/arm/insts/static_inst.hh | 15 +++++++ src/arch/arm/isa.cc | 67 +++++++++++++++++++++++++++++-- src/arch/arm/isa.hh | 4 ++ 6 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc index c315ecfefb..9fc4be0e9a 100644 --- a/src/arch/arm/decoder.cc +++ b/src/arch/arm/decoder.cc @@ -67,6 +67,10 @@ Decoder::Decoder(const ArmDecoderParams ¶ms) sveLen = (safe_cast(params.isa)-> getCurSveVecLenInBitsAtReset() >> 7) - 1; + // Initialize SME vector length + smeLen = (safe_cast(params.isa) + ->getCurSmeVecLenInBitsAtReset() >> 7) - 1; + if (dvmEnabled) { warn_once( "DVM Ops instructions are micro-architecturally " diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh index 8e486a3458..83690936c0 100644 --- a/src/arch/arm/decoder.hh +++ b/src/arch/arm/decoder.hh @@ -85,6 +85,12 @@ class Decoder : public InstDecoder */ int sveLen; + /** + * SME vector length, encoded in the same format as the SMCR_EL.LEN + * bitfields. + */ + int smeLen; + enums::DecoderFlavor decoderFlavor; /// A cache of decoded instruction objects. @@ -158,6 +164,12 @@ class Decoder : public InstDecoder { sveLen = len; } + + void + setSmeLen(uint8_t len) + { + smeLen = len; + } }; } // namespace ArmISA diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc index c07fb3922a..446f2afd18 100644 --- a/src/arch/arm/insts/static_inst.cc +++ b/src/arch/arm/insts/static_inst.cc @@ -1233,5 +1233,13 @@ ArmStaticInst::getCurSveVecLenInBits(ThreadContext *tc) return isa->getCurSveVecLenInBits(); } +unsigned +ArmStaticInst::getCurSmeVecLenInBits(ThreadContext *tc) +{ + auto *isa = static_cast(tc->getIsaPtr()); + return isa->getCurSmeVecLenInBits(); +} + + } // namespace ArmISA } // namespace gem5 diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh index fa58f98de9..3b67e6b253 100644 --- a/src/arch/arm/insts/static_inst.hh +++ b/src/arch/arm/insts/static_inst.hh @@ -583,6 +583,21 @@ class ArmStaticInst : public StaticInst return getCurSveVecLenInBits(tc) / (8 * sizeof(T)); } + static unsigned getCurSmeVecLenInBits(ThreadContext *tc); + + static unsigned + getCurSmeVecLenInQWords(ThreadContext *tc) + { + return getCurSmeVecLenInBits(tc) >> 6; + } + + template + static unsigned + getCurSmeVecLen(ThreadContext *tc) + { + return getCurSmeVecLenInBits(tc) / (8 * sizeof(T)); + } + inline Fault undefined(bool disabled=false) const { diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc index 78a1f4fc9f..aec824387b 100644 --- a/src/arch/arm/isa.cc +++ b/src/arch/arm/isa.cc @@ -1162,6 +1162,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val) tc->getDecoderPtr()->as().setSveLen( (getCurSveVecLenInBits() >> 7) - 1); + tc->getDecoderPtr()->as().setSmeLen( + (getCurSmeVecLenInBits() >> 7) - 1); // Follow slightly different semantics if a CheckerCPU object // is connected @@ -2069,11 +2071,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val) case MISCREG_SMCR_EL2: case MISCREG_SMCR_EL1: // Set the value here as we need to update the regs before - // reading them back in getCurSmeVecLenInBits (not - // implemented yet) to avoid setting stale vector lengths in - // the decoder. + // reading them back in getCurSmeVecLenInBits to avoid + // setting stale vector lengths in the decoder. setMiscRegNoEffect(idx, newVal); - // TODO: set the SME vector length + tc->getDecoderPtr()->as().setSmeLen( + (getCurSmeVecLenInBits() >> 7) - 1); return; } setMiscRegNoEffect(idx, newVal); @@ -2161,6 +2163,13 @@ ISA::currEL() const unsigned ISA::getCurSveVecLenInBits() const { + SVCR svcr = miscRegs[MISCREG_SVCR]; + // If we are in Streaming Mode, we should return the Streaming Mode vector + // length instead. + if (svcr.sm) { + return getCurSmeVecLenInBits(); + } + if (!FullSystem) { return sveVL * 128; } @@ -2202,6 +2211,56 @@ ISA::getCurSveVecLenInBits() const return (len + 1) * 128; } +unsigned +ISA::getCurSmeVecLenInBits() const +{ + if (!FullSystem) { + return smeVL * 128; + } + + panic_if(!tc, + "A ThreadContext is needed to determine the SME vector length " + "in full-system mode"); + + CPSR cpsr = miscRegs[MISCREG_CPSR]; + ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el; + + unsigned len = 0; + + if (el == EL1 || (el == EL0 && !ELIsInHost(tc, el))) { + len = static_cast(miscRegs[MISCREG_SMCR_EL1]).len; + } + + if (el == EL2 || (el == EL0 && ELIsInHost(tc, el))) { + len = static_cast(miscRegs[MISCREG_SMCR_EL2]).len; + } else if (release->has(ArmExtension::VIRTUALIZATION) && !isSecure(tc) && + (el == EL0 || el == EL1)) { + len = std::min( + len, + static_cast( + static_cast(miscRegs[MISCREG_SMCR_EL2]).len)); + } + + if (el == EL3) { + len = static_cast(miscRegs[MISCREG_SMCR_EL3]).len; + } else if (release->has(ArmExtension::SECURITY)) { + len = std::min( + len, + static_cast( + static_cast(miscRegs[MISCREG_SMCR_EL3]).len)); + } + + len = std::min(len, smeVL - 1); + + // len + 1 must be a power of 2! Round down to the nearest whole power of + // two. + static const unsigned LUT[16] = {0, 1, 1, 3, 3, 3, 3, 7, + 7, 7, 7, 7, 7, 7, 7, 15}; + len = LUT[len]; + + return (len + 1) * 128; +} + void ISA::serialize(CheckpointOut &cp) const { diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh index bc0ab7683e..512799feed 100644 --- a/src/arch/arm/isa.hh +++ b/src/arch/arm/isa.hh @@ -367,6 +367,10 @@ namespace ArmISA unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; } + unsigned getCurSmeVecLenInBits() const; + + unsigned getCurSmeVecLenInBitsAtReset() const { return smeVL * 128; } + template static void zeroSveVecRegUpperPart(Elem *v, unsigned eCount)