From 7580e8d53d3490b3e582637a491d17941962fbbe Mon Sep 17 00:00:00 2001 From: Giacomo Travaglini Date: Fri, 8 Apr 2022 10:30:05 +0100 Subject: [PATCH] arch-arm: Memoize computeAddrTop in the MMU code Profiling gem5 has indicated computeAddrTop as one of the main contributors in AArch64 simulation time The utility function gets used in the critical path of gem5, which is the memory translation subsystem. The function is supposed to compute a rather trivial task: identifying the "real" most significant bit of a virtual address. This turns out to be quite expensive. Why? The main issue is the AArch32/AArch64 check, which uses the ELIs32 helper. This performs a sequential read of several MiscReg values until it confirms that an EL is indeed using AArch32 (or AArch64). This is functionally accurate but it is too expensive for the critical path of a program. This patch is addressing the issue by adding a Memoizer object for the computeAddrTop function to the CachedState data structure, which is already holding cached system register values for performance reasons. Whenever we need to invalidate those sys reg values because of a change in the translation system, we also flush/invalidate the memoizer cache Change-Id: If42e945c650c293ace304fb4c35e709783bb82d4 Signed-off-by: Giacomo Travaglini Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/59151 Reviewed-by: Jason Lowe-Power Tested-by: kokoro --- src/arch/arm/mmu.cc | 34 +++++++++++++++++++++-------- src/arch/arm/mmu.hh | 52 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/arch/arm/mmu.cc b/src/arch/arm/mmu.cc index 79a5240e70..0dde0d1083 100644 --- a/src/arch/arm/mmu.cc +++ b/src/arch/arm/mmu.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013, 2016-2021 Arm Limited + * Copyright (c) 2010-2013, 2016-2022 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -197,6 +197,8 @@ void MMU::invalidateMiscReg() { s1State.miscRegValid = false; + s1State.computeAddrTop.flush(); + s2State.computeAddrTop.flush(); } Fault @@ -237,11 +239,12 @@ MMU::translateSe(const RequestPtr &req, ThreadContext *tc, Mode mode, updateMiscReg(tc, NormalTran, state.isStage2); Addr vaddr_tainted = req->getVaddr(); Addr vaddr = 0; - if (state.aarch64) + if (state.aarch64) { vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL, - (TCR)state.ttbcr, mode==Execute); - else + static_cast(state.ttbcr), mode==Execute, state); + } else { vaddr = vaddr_tainted; + } Request::Flags flags = req->getFlags(); bool is_fetch = (mode == Execute); @@ -480,7 +483,7 @@ MMU::checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode, Addr vaddr_tainted = req->getVaddr(); Addr vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL, - (TCR)state.ttbcr, mode==Execute); + static_cast(state.ttbcr), mode==Execute, state); Request::Flags flags = req->getFlags(); bool is_fetch = (mode == Execute); @@ -789,6 +792,18 @@ MMU::checkPAN(ThreadContext *tc, uint8_t ap, const RequestPtr &req, Mode mode, return false; } +Addr +MMU::purifyTaggedAddr(Addr vaddr_tainted, ThreadContext *tc, ExceptionLevel el, + TCR tcr, bool is_inst, CachedState& state) +{ + const bool selbit = bits(vaddr_tainted, 55); + + // Call the memoized version of computeAddrTop + const auto topbit = state.computeAddrTop(tc, selbit, is_inst, tcr, el); + + return maskTaggedAddr(vaddr_tainted, tc, el, topbit); +} + Fault MMU::translateMmuOff(ThreadContext *tc, const RequestPtr &req, Mode mode, ArmTranslationType tran_type, Addr vaddr, bool long_desc_format, @@ -947,11 +962,12 @@ MMU::translateFs(const RequestPtr &req, ThreadContext *tc, Mode mode, Addr vaddr_tainted = req->getVaddr(); Addr vaddr = 0; - if (state.aarch64) + if (state.aarch64) { vaddr = purifyTaggedAddr(vaddr_tainted, tc, state.aarch64EL, - (TCR)state.ttbcr, mode==Execute); - else + static_cast(state.ttbcr), mode==Execute, state); + } else { vaddr = vaddr_tainted; + } Request::Flags flags = req->getFlags(); bool is_fetch = (mode == Execute); @@ -1443,7 +1459,7 @@ MMU::getTE(TlbEntry **te, const RequestPtr &req, ThreadContext *tc, Mode mode, ExceptionLevel target_el = state.aarch64 ? state.aarch64EL : EL1; if (state.aarch64) { vaddr = purifyTaggedAddr(vaddr_tainted, tc, target_el, - (TCR)state.ttbcr, mode==Execute); + static_cast(state.ttbcr), mode==Execute, state); } else { vaddr = vaddr_tainted; } diff --git a/src/arch/arm/mmu.hh b/src/arch/arm/mmu.hh index fc5b3074fa..171dbf5f82 100644 --- a/src/arch/arm/mmu.hh +++ b/src/arch/arm/mmu.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013, 2016, 2019-2021 Arm Limited + * Copyright (c) 2010-2013, 2016, 2019-2022 Arm Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -43,8 +43,9 @@ #include "arch/arm/page_size.hh" #include "arch/arm/tlb.hh" +#include "arch/arm/utility.hh" #include "arch/generic/mmu.hh" - +#include "base/memoizer.hh" #include "enums/ArmLookupLevel.hh" #include "params/ArmMMU.hh" @@ -130,11 +131,44 @@ class MMU : public BaseMMU S12E1Tran = 0x100 }; - struct CachedState { - explicit CachedState(MMU *_mmu, bool stage2) - : mmu(_mmu), isStage2(stage2) + struct CachedState + { + CachedState(MMU *_mmu, bool stage2) + : mmu(_mmu), isStage2(stage2), + computeAddrTop(ArmISA::computeAddrTop) {} + CachedState& + operator=(const CachedState &rhs) + { + isStage2 = rhs.isStage2; + cpsr = rhs.cpsr; + aarch64 = rhs.aarch64; + aarch64EL = EL0; + sctlr = rhs.sctlr; + scr = rhs.scr; + isPriv = rhs.isPriv; + isSecure = rhs.isSecure; + isHyp = rhs.isHyp; + ttbcr = rhs.ttbcr; + asid = rhs.asid; + vmid = rhs.vmid; + prrr = rhs.prrr; + nmrr = rhs.nmrr; + hcr = rhs.hcr; + dacr = rhs.dacr; + miscRegValid = rhs.miscRegValid; + curTranType = rhs.curTranType; + stage2Req = rhs.stage2Req; + stage2DescReq = rhs.stage2DescReq; + directToStage2 = rhs.directToStage2; + + // When we copy we just flush the memoizer cache + computeAddrTop.flush(); + + return *this; + } + void updateMiscReg(ThreadContext *tc, ArmTranslationType tran_type); /** Returns the current VMID @@ -173,6 +207,9 @@ class MMU : public BaseMMU // Indicates whether all translation requests should // be routed directly to the stage 2 TLB bool directToStage2 = false; + + Memoizer computeAddrTop; }; MMU(const ArmMMUParams &p); @@ -397,7 +434,12 @@ class MMU : public BaseMMU ThreadContext *tc, bool stage2); Fault checkPermissions64(TlbEntry *te, const RequestPtr &req, Mode mode, ThreadContext *tc, CachedState &state); + protected: + Addr purifyTaggedAddr(Addr vaddr_tainted, ThreadContext *tc, + ExceptionLevel el, + TCR tcr, bool is_inst, CachedState& state); + bool checkPAN(ThreadContext *tc, uint8_t ap, const RequestPtr &req, Mode mode, const bool is_priv, CachedState &state);