arch-arm: Memoize computeAddrTop in the MMU code

Profiling gem5 has indicated computeAddrTop as one of the main
contributors in AArch64 simulation time

The utility function gets used in the critical path of gem5, which is
the memory translation subsystem. The function is supposed to compute a
rather trivial task: identifying the "real" most significant bit of a
virtual address.

This turns out to be quite expensive. Why?

The main issue is the AArch32/AArch64 check, which uses the ELIs32
helper. This performs a sequential read of several MiscReg
values until it confirms that an EL is indeed using AArch32 (or
AArch64).

This is functionally accurate but it is too expensive for the critical
path of a program.

This patch is addressing the issue by adding a Memoizer object for the
computeAddrTop function to the CachedState data structure, which is
already holding cached system register values for performance reasons.
Whenever we need to invalidate those sys reg values because of a change
in the translation system, we also flush/invalidate the memoizer cache

Change-Id: If42e945c650c293ace304fb4c35e709783bb82d4
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/59151
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Giacomo Travaglini
2022-04-08 10:30:05 +01:00
parent cc6aa4c173
commit 7580e8d53d
2 changed files with 72 additions and 14 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2013, 2016-2021 Arm Limited
* Copyright (c) 2010-2013, 2016-2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -197,6 +197,8 @@ void
MMU::invalidateMiscReg()
{
s1State.miscRegValid = false;
s1State.computeAddrTop.flush();
s2State.computeAddrTop.flush();
}
Fault
@@ -237,11 +239,12 @@ MMU::translateSe(const RequestPtr &req, ThreadContext *tc, Mode mode,
updateMiscReg(tc, NormalTran, state.isStage2);
Addr vaddr_tainted = req->getVaddr();
Addr vaddr = 0;
if (state.aarch64)
if (state.aarch64) {
vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL,
(TCR)state.ttbcr, mode==Execute);
else
static_cast<TCR>(state.ttbcr), mode==Execute, state);
} else {
vaddr = vaddr_tainted;
}
Request::Flags flags = req->getFlags();
bool is_fetch = (mode == Execute);
@@ -480,7 +483,7 @@ MMU::checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode,
Addr vaddr_tainted = req->getVaddr();
Addr vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL,
(TCR)state.ttbcr, mode==Execute);
static_cast<TCR>(state.ttbcr), mode==Execute, state);
Request::Flags flags = req->getFlags();
bool is_fetch = (mode == Execute);
@@ -789,6 +792,18 @@ MMU::checkPAN(ThreadContext *tc, uint8_t ap, const RequestPtr &req, Mode mode,
return false;
}
Addr
MMU::purifyTaggedAddr(Addr vaddr_tainted, ThreadContext *tc, ExceptionLevel el,
TCR tcr, bool is_inst, CachedState& state)
{
const bool selbit = bits(vaddr_tainted, 55);
// Call the memoized version of computeAddrTop
const auto topbit = state.computeAddrTop(tc, selbit, is_inst, tcr, el);
return maskTaggedAddr(vaddr_tainted, tc, el, topbit);
}
Fault
MMU::translateMmuOff(ThreadContext *tc, const RequestPtr &req, Mode mode,
ArmTranslationType tran_type, Addr vaddr, bool long_desc_format,
@@ -947,11 +962,12 @@ MMU::translateFs(const RequestPtr &req, ThreadContext *tc, Mode mode,
Addr vaddr_tainted = req->getVaddr();
Addr vaddr = 0;
if (state.aarch64)
if (state.aarch64) {
vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL,
(TCR)state.ttbcr, mode==Execute);
else
static_cast<TCR>(state.ttbcr), mode==Execute, state);
} else {
vaddr = vaddr_tainted;
}
Request::Flags flags = req->getFlags();
bool is_fetch = (mode == Execute);
@@ -1443,7 +1459,7 @@ MMU::getTE(TlbEntry **te, const RequestPtr &req, ThreadContext *tc, Mode mode,
ExceptionLevel target_el = state.aarch64 ? state.aarch64EL : EL1;
if (state.aarch64) {
vaddr = purifyTaggedAddr(vaddr_tainted, tc, target_el,
(TCR)state.ttbcr, mode==Execute);
static_cast<TCR>(state.ttbcr), mode==Execute, state);
} else {
vaddr = vaddr_tainted;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2013, 2016, 2019-2021 Arm Limited
* Copyright (c) 2010-2013, 2016, 2019-2022 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -43,8 +43,9 @@
#include "arch/arm/page_size.hh"
#include "arch/arm/tlb.hh"
#include "arch/arm/utility.hh"
#include "arch/generic/mmu.hh"
#include "base/memoizer.hh"
#include "enums/ArmLookupLevel.hh"
#include "params/ArmMMU.hh"
@@ -130,11 +131,44 @@ class MMU : public BaseMMU
S12E1Tran = 0x100
};
struct CachedState {
explicit CachedState(MMU *_mmu, bool stage2)
: mmu(_mmu), isStage2(stage2)
struct CachedState
{
CachedState(MMU *_mmu, bool stage2)
: mmu(_mmu), isStage2(stage2),
computeAddrTop(ArmISA::computeAddrTop)
{}
CachedState&
operator=(const CachedState &rhs)
{
isStage2 = rhs.isStage2;
cpsr = rhs.cpsr;
aarch64 = rhs.aarch64;
aarch64EL = EL0;
sctlr = rhs.sctlr;
scr = rhs.scr;
isPriv = rhs.isPriv;
isSecure = rhs.isSecure;
isHyp = rhs.isHyp;
ttbcr = rhs.ttbcr;
asid = rhs.asid;
vmid = rhs.vmid;
prrr = rhs.prrr;
nmrr = rhs.nmrr;
hcr = rhs.hcr;
dacr = rhs.dacr;
miscRegValid = rhs.miscRegValid;
curTranType = rhs.curTranType;
stage2Req = rhs.stage2Req;
stage2DescReq = rhs.stage2DescReq;
directToStage2 = rhs.directToStage2;
// When we copy we just flush the memoizer cache
computeAddrTop.flush();
return *this;
}
void updateMiscReg(ThreadContext *tc, ArmTranslationType tran_type);
/** Returns the current VMID
@@ -173,6 +207,9 @@ class MMU : public BaseMMU
// Indicates whether all translation requests should
// be routed directly to the stage 2 TLB
bool directToStage2 = false;
Memoizer<int, ThreadContext*, bool,
bool, TCR, ExceptionLevel> computeAddrTop;
};
MMU(const ArmMMUParams &p);
@@ -397,7 +434,12 @@ class MMU : public BaseMMU
ThreadContext *tc, bool stage2);
Fault checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode,
ThreadContext *tc, CachedState &state);
protected:
Addr purifyTaggedAddr(Addr vaddr_tainted, ThreadContext *tc,
ExceptionLevel el,
TCR tcr, bool is_inst, CachedState& state);
bool checkPAN(ThreadContext *tc, uint8_t ap, const RequestPtr &req,
Mode mode, const bool is_priv, CachedState &state);