From 00187b7bc3ec908ee03e6fc5a3f1408fb91daed4 Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Wed, 28 Jul 2021 02:16:53 -0700 Subject: [PATCH] x86,mem: Replace the x86 StoreCheck flag with READ_MODIFY_WRITE. X86 had a private/arch specific request flag called StoreCheck which it used to signal to the TLB that it should fault on a load if it would have faulted had it been a store. That way, you can detect whether a read-modify-write type of operation is going to fail due to a translation problem during the read, and don't have to worry about not doing anything architecturally visible until the store had succeeded, while also making sure not to do the store part if the modify part could fail. It seems that Ruby had hijacked that flag and had an architecture specific check which was looking for a load which was going to be followed by a store. The x86 flag was never intended to communicate that beyond the TLB, and this nominally architecture agnostic component shouldn't be reaching into the ISA specific flags to try to get that information. Instead, this change introduces a new Request flag called READ_MODIFY_WRITE which is used for the same purpose in x86, but in general means that a load will be followed by a write in the near future. With this new globally applicable flag, the ruby Sequencer class no longer needs to check what the arch is, nor does it need to access ISA private data in the request flags. Always doing this check should be no less efficient than before, because checking the arch involved calling into the system object, while checking the flag only requires masking a bit on the flags which the compiler probably already has floating around for other logic in this function. Change-Id: Ied5b744d31e7aa8bf25e399b6b321f9d2020a92f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48710 Tested-by: kokoro Reviewed-by: Bobby R. Bruce Maintainer: Gabe Black --- src/arch/x86/isa/microops/ldstop.isa | 8 ++++---- src/arch/x86/ldstflags.hh | 1 - src/arch/x86/tlb.cc | 2 +- src/gpu-compute/gpu_tlb.cc | 4 ++-- src/mem/request.hh | 16 +++++++++++++--- src/mem/ruby/system/Sequencer.cc | 9 +-------- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa index 4fb953acca..3c3b7e4847 100644 --- a/src/arch/x86/isa/microops/ldstop.isa +++ b/src/arch/x86/isa/microops/ldstop.isa @@ -518,10 +518,10 @@ let {{ implicitStack=True) defineMicroLoadOp('Ldst', 'Data = merge(Data, data, Mem, dataSize);', 'Data = Mem & mask(dataSize * 8);', - '(StoreCheck << FlagShift)') + 'Request::READ_MODIFY_WRITE') defineMicroLoadOp('Ldstl', 'Data = merge(Data, data, Mem, dataSize);', 'Data = Mem & mask(dataSize * 8);', - '(StoreCheck << FlagShift) | Request::LOCKED_RMW', + 'Request::READ_MODIFY_WRITE | Request::LOCKED_RMW', nonSpec=True) defineMicroLoadOp('Ldfp', code='FpData_uqw = Mem', big=False, @@ -599,10 +599,10 @@ let {{ ''' defineMicroLoadSplitOp('LdSplit', code, - '(StoreCheck << FlagShift)') + 'Request::READ_MODIFY_WRITE') defineMicroLoadSplitOp('LdSplitl', code, - '(StoreCheck << FlagShift) | Request::LOCKED_RMW', + 'Request::READ_MODIFY_WRITE | Request::LOCKED_RMW', nonSpec=True) def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0", diff --git a/src/arch/x86/ldstflags.hh b/src/arch/x86/ldstflags.hh index ab6fa2c763..7465d570ea 100644 --- a/src/arch/x86/ldstflags.hh +++ b/src/arch/x86/ldstflags.hh @@ -56,7 +56,6 @@ namespace X86ISA { CPL0FlagBit = 1, AddrSizeFlagBit = 2, - StoreCheck = 4 }; } // namespace X86ISA } // namespace gem5 diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc index 00e1120f4e..9c1a9dae5f 100644 --- a/src/arch/x86/tlb.cc +++ b/src/arch/x86/tlb.cc @@ -313,7 +313,7 @@ TLB::translate(const RequestPtr &req, { Request::Flags flags = req->getFlags(); int seg = flags & SegmentFlagMask; - bool storeCheck = flags & (StoreCheck << FlagShift); + bool storeCheck = flags & Request::READ_MODIFY_WRITE; delayedResponse = false; diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc index 9560b4de10..e2225a0ffd 100644 --- a/src/gpu-compute/gpu_tlb.cc +++ b/src/gpu-compute/gpu_tlb.cc @@ -424,7 +424,7 @@ namespace X86ISA { uint32_t flags = req->getFlags(); int seg = flags & SegmentFlagMask; - bool storeCheck = flags & (StoreCheck << FlagShift); + bool storeCheck = flags & Request::READ_MODIFY_WRITE; // If this is true, we're dealing with a request // to a non-memory address space. @@ -764,7 +764,7 @@ namespace X86ISA { HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG); uint32_t flags = pkt->req->getFlags(); - bool storeCheck = flags & (StoreCheck << FlagShift); + bool storeCheck = flags & Request::READ_MODIFY_WRITE; // Do paging protection checks. bool inUser diff --git a/src/mem/request.hh b/src/mem/request.hh index f6c975ac1b..3b884a9e29 100644 --- a/src/mem/request.hh +++ b/src/mem/request.hh @@ -157,6 +157,8 @@ class Request /** This request is for a memory swap. */ MEM_SWAP = 0x00400000, MEM_SWAP_COND = 0x00800000, + /** This request is a read which will be followed by a write. */ + READ_MODIFY_WRITE = 0x00020000, /** The request is a prefetch. */ PREFETCH = 0x01000000, @@ -939,14 +941,22 @@ class Request bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); } bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); } bool isInstFetch() const { return _flags.isSet(INST_FETCH); } - bool isPrefetch() const { return (_flags.isSet(PREFETCH) || - _flags.isSet(PF_EXCLUSIVE)); } + bool + isPrefetch() const + { + return (_flags.isSet(PREFETCH | PF_EXCLUSIVE)); + } bool isPrefetchEx() const { return _flags.isSet(PF_EXCLUSIVE); } bool isLLSC() const { return _flags.isSet(LLSC); } bool isPriv() const { return _flags.isSet(PRIVILEGED); } bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); } - bool isSwap() const { return _flags.isSet(MEM_SWAP|MEM_SWAP_COND); } + bool isSwap() const { return _flags.isSet(MEM_SWAP | MEM_SWAP_COND); } bool isCondSwap() const { return _flags.isSet(MEM_SWAP_COND); } + bool + isReadModifyWrite() const + { + return _flags.isSet(LOCKED_RMW | READ_MODIFY_WRITE); + } bool isSecure() const { return _flags.isSet(SECURE); } bool isPTWalk() const { return _flags.isSet(PT_WALK); } bool isRelease() const { return _flags.isSet(RELEASE); } diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index ca82052a73..77ab1700ed 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -725,14 +725,7 @@ Sequencer::makeRequest(PacketPtr pkt) } else if (pkt->req->isInstFetch()) { primary_type = secondary_type = RubyRequestType_IFETCH; } else { - bool storeCheck = false; - // only X86 need the store check - if (system->getArch() == Arch::X86ISA) { - uint32_t flags = pkt->req->getFlags(); - storeCheck = flags & - (X86ISA::StoreCheck << X86ISA::FlagShift); - } - if (storeCheck) { + if (pkt->req->isReadModifyWrite()) { primary_type = RubyRequestType_RMW_Read; secondary_type = RubyRequestType_ST; } else {