From 00187b7bc3ec908ee03e6fc5a3f1408fb91daed4 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@gmail.com>
Date: Wed, 28 Jul 2021 02:16:53 -0700
Subject: [PATCH] x86,mem: Replace the x86 StoreCheck flag with
 READ_MODIFY_WRITE.

X86 had a private/arch specific request flag called StoreCheck which it
used to signal to the TLB that it should fault on a load if it would
have faulted had it been a store. That way, you can detect whether a
read-modify-write type of operation is going to fail due to a
translation problem during the read, and don't have to worry about not
doing anything architecturally visible until the store had succeeded,
while also making sure not to do the store part if the modify part
could fail.

It seems that Ruby had hijacked that flag and had an architecture
specific check which was looking for a load which was going to be
followed by a store. The x86 flag was never intended to communicate that
beyond the TLB, and this nominally architecture agnostic component
shouldn't be reaching into the ISA specific flags to try to get that
information.

Instead, this change introduces a new Request flag called
READ_MODIFY_WRITE which is used for the same purpose in x86, but in
general means that a load will be followed by a write in the near
future.

With this new globally applicable flag, the ruby Sequencer class no
longer needs to check what the arch is, nor does it need to access ISA
private data in the request flags. Always doing this check should be no
less efficient than before, because checking the arch involved calling
into the system object, while checking the flag only requires masking a
bit on the flags which the compiler probably already has floating around
for other logic in this function.

Change-Id: Ied5b744d31e7aa8bf25e399b6b321f9d2020a92f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48710
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby R. Bruce <bbruce@ucdavis.edu>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/arch/x86/isa/microops/ldstop.isa |  8 ++++----
 src/arch/x86/ldstflags.hh            |  1 -
 src/arch/x86/tlb.cc                  |  2 +-
 src/gpu-compute/gpu_tlb.cc           |  4 ++--
 src/mem/request.hh                   | 16 +++++++++++++---
 src/mem/ruby/system/Sequencer.cc     |  9 +--------
 6 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/arch/x86/isa/microops/ldstop.isa b/src/arch/x86/isa/microops/ldstop.isa
index 4fb953acca..3c3b7e4847 100644
--- a/src/arch/x86/isa/microops/ldstop.isa
+++ b/src/arch/x86/isa/microops/ldstop.isa
@@ -518,10 +518,10 @@ let {{
                                implicitStack=True)
     defineMicroLoadOp('Ldst', 'Data = merge(Data, data, Mem, dataSize);',
                               'Data = Mem & mask(dataSize * 8);',
-                      '(StoreCheck << FlagShift)')
+                      'Request::READ_MODIFY_WRITE')
     defineMicroLoadOp('Ldstl', 'Data = merge(Data, data, Mem, dataSize);',
                                'Data = Mem & mask(dataSize * 8);',
-                      '(StoreCheck << FlagShift) | Request::LOCKED_RMW',
+                      'Request::READ_MODIFY_WRITE | Request::LOCKED_RMW',
                       nonSpec=True)
 
     defineMicroLoadOp('Ldfp', code='FpData_uqw = Mem', big=False,
@@ -599,10 +599,10 @@ let {{
     '''
 
     defineMicroLoadSplitOp('LdSplit', code,
-                           '(StoreCheck << FlagShift)')
+                           'Request::READ_MODIFY_WRITE')
 
     defineMicroLoadSplitOp('LdSplitl', code,
-                           '(StoreCheck << FlagShift) | Request::LOCKED_RMW',
+                           'Request::READ_MODIFY_WRITE | Request::LOCKED_RMW',
                            nonSpec=True)
 
     def defineMicroStoreOp(mnemonic, code, completeCode="", mem_flags="0",
diff --git a/src/arch/x86/ldstflags.hh b/src/arch/x86/ldstflags.hh
index ab6fa2c763..7465d570ea 100644
--- a/src/arch/x86/ldstflags.hh
+++ b/src/arch/x86/ldstflags.hh
@@ -56,7 +56,6 @@ namespace X86ISA
     {
         CPL0FlagBit = 1,
         AddrSizeFlagBit = 2,
-        StoreCheck = 4
     };
 } // namespace X86ISA
 } // namespace gem5
diff --git a/src/arch/x86/tlb.cc b/src/arch/x86/tlb.cc
index 00e1120f4e..9c1a9dae5f 100644
--- a/src/arch/x86/tlb.cc
+++ b/src/arch/x86/tlb.cc
@@ -313,7 +313,7 @@ TLB::translate(const RequestPtr &req,
 {
     Request::Flags flags = req->getFlags();
     int seg = flags & SegmentFlagMask;
-    bool storeCheck = flags & (StoreCheck << FlagShift);
+    bool storeCheck = flags & Request::READ_MODIFY_WRITE;
 
     delayedResponse = false;
 
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc
index 9560b4de10..e2225a0ffd 100644
--- a/src/gpu-compute/gpu_tlb.cc
+++ b/src/gpu-compute/gpu_tlb.cc
@@ -424,7 +424,7 @@ namespace X86ISA
     {
         uint32_t flags = req->getFlags();
         int seg = flags & SegmentFlagMask;
-        bool storeCheck = flags & (StoreCheck << FlagShift);
+        bool storeCheck = flags & Request::READ_MODIFY_WRITE;
 
         // If this is true, we're dealing with a request
         // to a non-memory address space.
@@ -764,7 +764,7 @@ namespace X86ISA
     {
         HandyM5Reg m5Reg = tc->readMiscRegNoEffect(MISCREG_M5_REG);
         uint32_t flags = pkt->req->getFlags();
-        bool storeCheck = flags & (StoreCheck << FlagShift);
+        bool storeCheck = flags & Request::READ_MODIFY_WRITE;
 
         // Do paging protection checks.
         bool inUser
diff --git a/src/mem/request.hh b/src/mem/request.hh
index f6c975ac1b..3b884a9e29 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -157,6 +157,8 @@ class Request
         /** This request is for a memory swap. */
         MEM_SWAP                    = 0x00400000,
         MEM_SWAP_COND               = 0x00800000,
+        /** This request is a read which will be followed by a write. */
+        READ_MODIFY_WRITE           = 0x00020000,
 
         /** The request is a prefetch. */
         PREFETCH                    = 0x01000000,
@@ -939,14 +941,22 @@ class Request
     bool isUncacheable() const { return _flags.isSet(UNCACHEABLE); }
     bool isStrictlyOrdered() const { return _flags.isSet(STRICT_ORDER); }
     bool isInstFetch() const { return _flags.isSet(INST_FETCH); }
-    bool isPrefetch() const { return (_flags.isSet(PREFETCH) ||
-                                      _flags.isSet(PF_EXCLUSIVE)); }
+    bool
+    isPrefetch() const
+    {
+        return (_flags.isSet(PREFETCH | PF_EXCLUSIVE));
+    }
     bool isPrefetchEx() const { return _flags.isSet(PF_EXCLUSIVE); }
     bool isLLSC() const { return _flags.isSet(LLSC); }
     bool isPriv() const { return _flags.isSet(PRIVILEGED); }
     bool isLockedRMW() const { return _flags.isSet(LOCKED_RMW); }
-    bool isSwap() const { return _flags.isSet(MEM_SWAP|MEM_SWAP_COND); }
+    bool isSwap() const { return _flags.isSet(MEM_SWAP | MEM_SWAP_COND); }
     bool isCondSwap() const { return _flags.isSet(MEM_SWAP_COND); }
+    bool
+    isReadModifyWrite() const
+    {
+        return _flags.isSet(LOCKED_RMW | READ_MODIFY_WRITE);
+    }
     bool isSecure() const { return _flags.isSet(SECURE); }
     bool isPTWalk() const { return _flags.isSet(PT_WALK); }
     bool isRelease() const { return _flags.isSet(RELEASE); }
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index ca82052a73..77ab1700ed 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -725,14 +725,7 @@ Sequencer::makeRequest(PacketPtr pkt)
             } else if (pkt->req->isInstFetch()) {
                 primary_type = secondary_type = RubyRequestType_IFETCH;
             } else {
-                bool storeCheck = false;
-                // only X86 need the store check
-                if (system->getArch() == Arch::X86ISA) {
-                    uint32_t flags = pkt->req->getFlags();
-                    storeCheck = flags &
-                        (X86ISA::StoreCheck << X86ISA::FlagShift);
-                }
-                if (storeCheck) {
+                if (pkt->req->isReadModifyWrite()) {
                     primary_type = RubyRequestType_RMW_Read;
                     secondary_type = RubyRequestType_ST;
                 } else {