arch-arm: Add interfaces to set and get SME vector length

We add interfaces which roughly mirror those already present for
manipulating the SVE vector lengths to set/get the SME vector length.

In the case of the SME vector length we also need to do some checking
to ensure that the vector length itself is aligned to a whole power of
two (one of the SME requirements).

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: Ib89a4804466f5445adea6de8d65df512e366d618
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64336
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Sascha Bischoff
2022-08-03 14:54:04 +01:00
committed by Giacomo Travaglini
parent dfd151d52d
commit 72e4f614a2
6 changed files with 106 additions and 4 deletions

View File

@@ -67,6 +67,10 @@ Decoder::Decoder(const ArmDecoderParams &params)
sveLen = (safe_cast<ISA *>(params.isa)->
getCurSveVecLenInBitsAtReset() >> 7) - 1;
// Initialize SME vector length
smeLen = (safe_cast<ISA *>(params.isa)
->getCurSmeVecLenInBitsAtReset() >> 7) - 1;
if (dvmEnabled) {
warn_once(
"DVM Ops instructions are micro-architecturally "

View File

@@ -85,6 +85,12 @@ class Decoder : public InstDecoder
*/
int sveLen;
/**
* SME vector length, encoded in the same format as the SMCR_EL<x>.LEN
* bitfields.
*/
int smeLen;
enums::DecoderFlavor decoderFlavor;
/// A cache of decoded instruction objects.
@@ -158,6 +164,12 @@ class Decoder : public InstDecoder
{
sveLen = len;
}
void
setSmeLen(uint8_t len)
{
smeLen = len;
}
};
} // namespace ArmISA

View File

@@ -1233,5 +1233,13 @@ ArmStaticInst::getCurSveVecLenInBits(ThreadContext *tc)
return isa->getCurSveVecLenInBits();
}
unsigned
ArmStaticInst::getCurSmeVecLenInBits(ThreadContext *tc)
{
auto *isa = static_cast<ArmISA::ISA *>(tc->getIsaPtr());
return isa->getCurSmeVecLenInBits();
}
} // namespace ArmISA
} // namespace gem5

View File

@@ -583,6 +583,21 @@ class ArmStaticInst : public StaticInst
return getCurSveVecLenInBits(tc) / (8 * sizeof(T));
}
static unsigned getCurSmeVecLenInBits(ThreadContext *tc);
static unsigned
getCurSmeVecLenInQWords(ThreadContext *tc)
{
return getCurSmeVecLenInBits(tc) >> 6;
}
template<typename T>
static unsigned
getCurSmeVecLen(ThreadContext *tc)
{
return getCurSmeVecLenInBits(tc) / (8 * sizeof(T));
}
inline Fault
undefined(bool disabled=false) const
{

View File

@@ -1162,6 +1162,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
tc->getDecoderPtr()->as<Decoder>().setSveLen(
(getCurSveVecLenInBits() >> 7) - 1);
tc->getDecoderPtr()->as<Decoder>().setSmeLen(
(getCurSmeVecLenInBits() >> 7) - 1);
// Follow slightly different semantics if a CheckerCPU object
// is connected
@@ -2069,11 +2071,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
case MISCREG_SMCR_EL2:
case MISCREG_SMCR_EL1:
// Set the value here as we need to update the regs before
// reading them back in getCurSmeVecLenInBits (not
// implemented yet) to avoid setting stale vector lengths in
// the decoder.
// reading them back in getCurSmeVecLenInBits to avoid
// setting stale vector lengths in the decoder.
setMiscRegNoEffect(idx, newVal);
// TODO: set the SME vector length
tc->getDecoderPtr()->as<Decoder>().setSmeLen(
(getCurSmeVecLenInBits() >> 7) - 1);
return;
}
setMiscRegNoEffect(idx, newVal);
@@ -2161,6 +2163,13 @@ ISA::currEL() const
unsigned
ISA::getCurSveVecLenInBits() const
{
SVCR svcr = miscRegs[MISCREG_SVCR];
// If we are in Streaming Mode, we should return the Streaming Mode vector
// length instead.
if (svcr.sm) {
return getCurSmeVecLenInBits();
}
if (!FullSystem) {
return sveVL * 128;
}
@@ -2202,6 +2211,56 @@ ISA::getCurSveVecLenInBits() const
return (len + 1) * 128;
}
unsigned
ISA::getCurSmeVecLenInBits() const
{
if (!FullSystem) {
return smeVL * 128;
}
panic_if(!tc,
"A ThreadContext is needed to determine the SME vector length "
"in full-system mode");
CPSR cpsr = miscRegs[MISCREG_CPSR];
ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
unsigned len = 0;
if (el == EL1 || (el == EL0 && !ELIsInHost(tc, el))) {
len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL1]).len;
}
if (el == EL2 || (el == EL0 && ELIsInHost(tc, el))) {
len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL2]).len;
} else if (release->has(ArmExtension::VIRTUALIZATION) && !isSecure(tc) &&
(el == EL0 || el == EL1)) {
len = std::min(
len,
static_cast<unsigned>(
static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL2]).len));
}
if (el == EL3) {
len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL3]).len;
} else if (release->has(ArmExtension::SECURITY)) {
len = std::min(
len,
static_cast<unsigned>(
static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL3]).len));
}
len = std::min(len, smeVL - 1);
// len + 1 must be a power of 2! Round down to the nearest whole power of
// two.
static const unsigned LUT[16] = {0, 1, 1, 3, 3, 3, 3, 7,
7, 7, 7, 7, 7, 7, 7, 15};
len = LUT[len];
return (len + 1) * 128;
}
void
ISA::serialize(CheckpointOut &cp) const
{

View File

@@ -367,6 +367,10 @@ namespace ArmISA
unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; }
unsigned getCurSmeVecLenInBits() const;
unsigned getCurSmeVecLenInBitsAtReset() const { return smeVL * 128; }
template <typename Elem>
static void
zeroSveVecRegUpperPart(Elem *v, unsigned eCount)